From 9d4b4436a0fee24f0c6c8d8b6ff0dfd48a2c6539 Mon Sep 17 00:00:00 2001 From: Henrique Chaves Date: Wed, 20 Sep 2023 22:44:31 +0200 Subject: [PATCH 1/9] Update ngrok token CLI param --- python-client/docs/cli/ngrok/index.rst | 2 +- .../docs/guides/installation_app/index.rst | 2 +- ...amazon_review_classification_sklearn.ipynb | 5382 +++++----- .../docs/reference/notebooks/api_model.ipynb | 290 +- .../notebooks/cancer_detection_xgboost.ipynb | 810 +- .../notebooks/churn_prediction_lgbm.ipynb | 1060 +- .../reference/notebooks/credit_scoring.ipynb | 1044 +- .../drug_classification_sklearn.ipynb | 2720 +++--- .../enron_email_classification_sklearn.ipynb | 1132 +-- .../notebooks/example_notebook.ipynb | 5538 +++++------ .../fake_real_news_classification.ipynb | 4182 ++++---- .../notebooks/hotel_text_regression.ipynb | 3868 ++++---- ...aud_detection_adversarial_validation.ipynb | 5574 +++++------ .../notebooks/insurance_prediction_lgbm.ipynb | 3870 ++++---- ...al_transcript_classification_sklearn.ipynb | 8682 ++++++++--------- ...iment_classification_pytorch_sklearn.ipynb | 4790 ++++----- .../newspaper_classification_pytorch.ipynb | 1200 +-- ...tripadvisor_sentiment_classification.ipynb | 6400 ++++++------ .../notebooks/wage_classification.ipynb | 4854 ++++----- python-client/giskard/commands/cli_server.py | 2 +- 20 files changed, 30701 insertions(+), 30701 deletions(-) diff --git a/python-client/docs/cli/ngrok/index.rst b/python-client/docs/cli/ngrok/index.rst index a6b91ed79e..b197722e3d 100644 --- a/python-client/docs/cli/ngrok/index.rst +++ b/python-client/docs/cli/ngrok/index.rst @@ -22,7 +22,7 @@ Copy the following key: 3. Expose the giskard server ^^^^^^^^^^^^^^^^^^^^^^^ -Now you can run :code:`giskard server expose --token ` which should prompt you with the following instructions::: +Now you can run :code:`giskard server expose --ngrok-token ` which should prompt you with the following instructions::: Exposing Giskard Server to the internet... Giskard Server is now exposed to the internet. diff --git a/python-client/docs/guides/installation_app/index.rst b/python-client/docs/guides/installation_app/index.rst index 3508a3bd87..24d07acd4b 100644 --- a/python-client/docs/guides/installation_app/index.rst +++ b/python-client/docs/guides/installation_app/index.rst @@ -136,7 +136,7 @@ Giskard executes your model using a worker that runs the model directly in your .. code-block:: sh - giskard server expose --token + giskard server expose --ngrok-token Read the flowing `instructions `_ in order to get the :code:`ngrok_API_token`. Then run the below 4 lines of code in a **cell of your Colab notebook**: diff --git a/python-client/docs/reference/notebooks/amazon_review_classification_sklearn.ipynb b/python-client/docs/reference/notebooks/amazon_review_classification_sklearn.ipynb index 48237215cd..41f9c0873a 100644 --- a/python-client/docs/reference/notebooks/amazon_review_classification_sklearn.ipynb +++ b/python-client/docs/reference/notebooks/amazon_review_classification_sklearn.ipynb @@ -1,2701 +1,2701 @@ { - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "collapsed": false - }, - "source": [ - "# Amazon reviews classification [sklearn]\n", - "* Binary classification of product's review 'helpfulness' (quality).\n", - "* Reference notebook: \n", - "* Dataset: \n", - "\n", - "By running this notebook, you’ll create a whole test suite in a few lines of code. The model used here is a simple classification model with the Amazon reviews dataset. Feel free to use your own model (tabular, text, or LLM).\n", - "\n", - "You’ll learn how to:\n", - "\n", - "* Detect vulnerabilities by scanning the model\n", - "* Generate a test suite with domain-specific tests\n", - "* Customize your test suite by loading a test from the Giskard catalog\n", - "* Upload your model to the Giskard server to:\n", - " * Compare models to decide which one to promote\n", - " * Debug your tests to diagnose issues\n", - " * Share your results and collect business feedback from your team" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "collapsed": false - }, - "source": [ - "## Install Giskard" - ] - }, - { - "cell_type": "code", - "execution_count": 40, - "metadata": { - "ExecuteTime": { - "end_time": "2023-08-21T10:23:41.394690Z", - "start_time": "2023-08-21T10:23:41.372632Z" + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "collapsed": false + }, + "source": [ + "# Amazon reviews classification [sklearn]\n", + "* Binary classification of product's review 'helpfulness' (quality).\n", + "* Reference notebook: \n", + "* Dataset: \n", + "\n", + "By running this notebook, you’ll create a whole test suite in a few lines of code. The model used here is a simple classification model with the Amazon reviews dataset. Feel free to use your own model (tabular, text, or LLM).\n", + "\n", + "You’ll learn how to:\n", + "\n", + "* Detect vulnerabilities by scanning the model\n", + "* Generate a test suite with domain-specific tests\n", + "* Customize your test suite by loading a test from the Giskard catalog\n", + "* Upload your model to the Giskard server to:\n", + " * Compare models to decide which one to promote\n", + " * Debug your tests to diagnose issues\n", + " * Share your results and collect business feedback from your team" + ] }, - "collapsed": false - }, - "outputs": [], - "source": [ - "!pip install \"giskard>=2.0.0b\" -U" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Import libraries" - ] - }, - { - "cell_type": "code", - "execution_count": 41, - "metadata": { - "ExecuteTime": { - "end_time": "2023-08-21T10:23:41.794719Z", - "start_time": "2023-08-21T10:23:41.787341Z" + { + "cell_type": "markdown", + "metadata": { + "collapsed": false + }, + "source": [ + "## Install Giskard" + ] }, - "collapsed": false - }, - "outputs": [], - "source": [ - "import os\n", - "import string\n", - "from pathlib import Path\n", - "from urllib.request import urlretrieve\n", - "\n", - "import numpy as np\n", - "import pandas as pd\n", - "from sklearn.pipeline import Pipeline\n", - "from sklearn.linear_model import LogisticRegression\n", - "from sklearn.model_selection import train_test_split\n", - "from sklearn.preprocessing import FunctionTransformer\n", - "from sklearn.feature_extraction.text import TfidfVectorizer\n", - "from sklearn.metrics import roc_auc_score, balanced_accuracy_score\n", - "\n", - "import giskard\n", - "from giskard import Dataset, Model, GiskardClient, testing\n", - "\n", - "# Disable chained assignment warning.\n", - "pd.options.mode.chained_assignment = None" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Define constants" - ] - }, - { - "cell_type": "code", - "execution_count": 42, - "metadata": { - "ExecuteTime": { - "end_time": "2023-08-21T10:23:42.247455Z", - "start_time": "2023-08-21T10:23:42.215098Z" - } - }, - "outputs": [], - "source": [ - "# Constants.\n", - "RANDOM_SEED = 0\n", - "TEST_RATIO = 0.2\n", - "\n", - "TARGET_THRESHOLD = 0.5\n", - "TARGET_NAME = \"isHelpful\"\n", - "\n", - "# Paths.\n", - "DATA_URL = \"ftp://sys.giskard.ai/pub/unit_test_resources/amazon_review_dataset/reviews.json\"\n", - "DATA_PATH = Path.home() / \".giskard\" / \"amazon_review_dataset\" / \"reviews.json\"" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Dataset preparation" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "collapsed": false - }, - "source": [ - "### Load and preprocess data" - ] - }, - { - "cell_type": "code", - "execution_count": 43, - "metadata": { - "ExecuteTime": { - "end_time": "2023-08-21T10:23:43.346692Z", - "start_time": "2023-08-21T10:23:43.336322Z" + { + "cell_type": "code", + "execution_count": 40, + "metadata": { + "ExecuteTime": { + "end_time": "2023-08-21T10:23:41.394690Z", + "start_time": "2023-08-21T10:23:41.372632Z" + }, + "collapsed": false + }, + "outputs": [], + "source": [ + "!pip install \"giskard>=2.0.0b\" -U" + ] }, - "collapsed": false - }, - "outputs": [], - "source": [ - "def fetch_from_ftp(url: str, file: Path) -> None:\n", - " \"\"\"Helper to fetch data from the FTP server.\"\"\"\n", - " if not file.parent.exists():\n", - " file.parent.mkdir(parents=True, exist_ok=True)\n", - "\n", - " if not file.exists():\n", - " print(f\"Downloading data from {url}\")\n", - " urlretrieve(url, file)\n", - "\n", - " print(f\"Data was loaded!\")\n", - "\n", - "\n", - "def download_data(**kwargs) -> pd.DataFrame:\n", - " \"\"\"Download the dataset using URL.\"\"\"\n", - " fetch_from_ftp(DATA_URL, DATA_PATH)\n", - " _df = pd.read_json(DATA_PATH, lines=True, **kwargs)\n", - " return _df\n", - "\n", - "\n", - "def preprocess_data(df: pd.DataFrame) -> pd.DataFrame:\n", - " \"\"\"Perform data-preprocessing steps.\"\"\"\n", - " print(f\"Start data preprocessing...\")\n", - "\n", - " # Select columns.\n", - " df = df[[\"reviewText\", \"helpful\"]]\n", - "\n", - " # Remove Null-characters (x00) from the dataset.\n", - " df.reviewText = df.reviewText.apply(lambda x: x.replace(\"\\x00\", \"\"))\n", - "\n", - " # Extract numbers of helpful and total votes.\n", - " df['helpful_ratings'] = df.helpful.apply(lambda x: x[0])\n", - " df['total_ratings'] = df.helpful.apply(lambda x: x[1])\n", - "\n", - " # Filter unreasonable comments.\n", - " df = df[df.total_ratings > 10]\n", - "\n", - " # Create target column.\n", - " df[TARGET_NAME] = np.where((df.helpful_ratings / df.total_ratings) > TARGET_THRESHOLD, 1, 0).astype(int)\n", - "\n", - " # Delete columns we don't need anymore.\n", - " df.drop(columns=[\"helpful\", 'helpful_ratings', 'total_ratings'], inplace=True)\n", - "\n", - " print(\"Data preprocessing finished!\")\n", - "\n", - " return df" - ] - }, - { - "cell_type": "code", - "execution_count": 44, - "metadata": { - "ExecuteTime": { - "start_time": "2023-08-21T10:23:43.634130Z" - } - }, - "outputs": [ { - "name": "stdout", - "output_type": "stream", - "text": [ - "Data was loaded!\n", - "Start data preprocessing...\n", - "Data preprocessing finished!\n" - ] - } - ], - "source": [ - "reviews_df = download_data()\n", - "reviews_df = preprocess_data(reviews_df)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "collapsed": false - }, - "source": [ - "### Train-test split" - ] - }, - { - "cell_type": "code", - "execution_count": 45, - "metadata": { - "ExecuteTime": { - "start_time": "2023-08-21T10:24:08.127596Z" - } - }, - "outputs": [], - "source": [ - "X_train, X_test, y_train, y_test = train_test_split(reviews_df[[\"reviewText\"]], reviews_df[TARGET_NAME],\n", - " test_size=TEST_RATIO, random_state=RANDOM_SEED, \n", - " stratify=reviews_df[TARGET_NAME])" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Wrap dataset with Giskard" - ] - }, - { - "cell_type": "code", - "execution_count": 46, - "metadata": { - "ExecuteTime": { - "start_time": "2023-08-21T10:24:08.164230Z" - } - }, - "outputs": [], - "source": [ - "raw_data = pd.concat([X_test, y_test], axis=1)\n", - "wrapped_data = Dataset(\n", - " df=raw_data, # A pandas.DataFrame that contains the raw data (before all the pre-processing steps) and the actual ground truth variable (target).\n", - " target=TARGET_NAME, # Ground truth variable.\n", - " name=\"reviews\", # Optional.\n", - " )" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Model training" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "collapsed": false - }, - "source": [ - "### Define preprocessing pipeline" - ] - }, - { - "cell_type": "code", - "execution_count": 47, - "metadata": { - "ExecuteTime": { - "start_time": "2023-08-21T10:24:08.192882Z" - } - }, - "outputs": [], - "source": [ - "def remove_punctuation(x):\n", - " \"\"\"Remove punctuation from input string.\"\"\"\n", - " x = x.reviewText.apply(lambda row: row.translate(str.maketrans('', '', string.punctuation)))\n", - " return x\n", - "\n", - "\n", - "preprocessor = Pipeline(steps=[\n", - " (\"punctuation\", FunctionTransformer(remove_punctuation)),\n", - " (\"vectorizer\", TfidfVectorizer(stop_words='english', min_df=0.01))\n", - "])" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Build estimator" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "pipeline = Pipeline(steps=[\n", - " (\"preprocessor\", preprocessor),\n", - " (\"estimator\", LogisticRegression(random_state=RANDOM_SEED, class_weight=\"balanced\"))\n", - "])\n", - "\n", - "pipeline.fit(X_train, y_train)\n", - "\n", - "# ROC-AUC score.\n", - "train_metric = roc_auc_score(y_train, pipeline.predict_proba(X_train)[:, 1])\n", - "test_metric = roc_auc_score(y_test, pipeline.predict_proba(X_test)[:, 1])\n", - "print(f\"Train ROC-AUC score: {train_metric:.2f}\")\n", - "print(f\"Test ROC-AUC score: {test_metric:.2f}\")\n", - "\n", - "# Balanced accuracy to account for imbalanced targets.\n", - "b_acc_train = balanced_accuracy_score(y_train, pipeline.predict(X_train))\n", - "b_acc_test = balanced_accuracy_score(y_test, pipeline.predict(X_test))\n", - "print(f\"Train balanced accuracy: {b_acc_train:.2f}\")\n", - "print(f\"Test balanced accuracy: {b_acc_test:.2f}\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Wrap model with Giskard" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "wrapped_model = Model(\n", - " model=pipeline.predict_proba, # A prediction function that encapsulates all the data pre-processing steps and that could be executed with the dataset used by the scan.\n", - " model_type=\"classification\", # Either regression, classification or text_generation.\n", - " name=\"review_helpfulness_predictor\", # Optional.\n", - " classification_labels=[0, 1], # Their order MUST be identical to the prediction_function's output order.\n", - " feature_names=[\"reviewText\"], # Default: all columns of your dataset.\n", - " # classification_threshold=0.5 # Default: 0.5.\n", - ")\n", - "\n", - "# Validate wrapped model.\n", - "wrapped_predict = wrapped_model.predict(wrapped_data).raw[:, 1]\n", - "wrapped_test_metric = roc_auc_score(y_test, wrapped_predict)\n", - "\n", - "print(f\"Wrapped Test ROC-AUC score: {wrapped_test_metric:.2f}\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Scan your model to find vulnerabilities\n", - "With the Giskard scan feature, you can detect vulnerabilities in your model, including performance biases, unrobustness, data leakage, stochasticity, underconfidence, ethical issues, and more. For detailed information about the scan feature, please refer to our scan documentation." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "results = giskard.scan(model=wrapped_model, dataset=wrapped_data)" - ] - }, - { - "cell_type": "code", - "execution_count": 51, - "metadata": { - "ExecuteTime": { - "end_time": "2023-08-21T10:58:12.495312Z", - "start_time": "2023-08-21T10:58:11.280446Z" + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Import libraries" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "metadata": { + "ExecuteTime": { + "end_time": "2023-08-21T10:23:41.794719Z", + "start_time": "2023-08-21T10:23:41.787341Z" + }, + "collapsed": false + }, + "outputs": [], + "source": [ + "import os\n", + "import string\n", + "from pathlib import Path\n", + "from urllib.request import urlretrieve\n", + "\n", + "import numpy as np\n", + "import pandas as pd\n", + "from sklearn.pipeline import Pipeline\n", + "from sklearn.linear_model import LogisticRegression\n", + "from sklearn.model_selection import train_test_split\n", + "from sklearn.preprocessing import FunctionTransformer\n", + "from sklearn.feature_extraction.text import TfidfVectorizer\n", + "from sklearn.metrics import roc_auc_score, balanced_accuracy_score\n", + "\n", + "import giskard\n", + "from giskard import Dataset, Model, GiskardClient, testing\n", + "\n", + "# Disable chained assignment warning.\n", + "pd.options.mode.chained_assignment = None" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Define constants" + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "metadata": { + "ExecuteTime": { + "end_time": "2023-08-21T10:23:42.247455Z", + "start_time": "2023-08-21T10:23:42.215098Z" + } + }, + "outputs": [], + "source": [ + "# Constants.\n", + "RANDOM_SEED = 0\n", + "TEST_RATIO = 0.2\n", + "\n", + "TARGET_THRESHOLD = 0.5\n", + "TARGET_NAME = \"isHelpful\"\n", + "\n", + "# Paths.\n", + "DATA_URL = \"ftp://sys.giskard.ai/pub/unit_test_resources/amazon_review_dataset/reviews.json\"\n", + "DATA_PATH = Path.home() / \".giskard\" / \"amazon_review_dataset\" / \"reviews.json\"" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Dataset preparation" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "collapsed": false + }, + "source": [ + "### Load and preprocess data" + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "metadata": { + "ExecuteTime": { + "end_time": "2023-08-21T10:23:43.346692Z", + "start_time": "2023-08-21T10:23:43.336322Z" + }, + "collapsed": false + }, + "outputs": [], + "source": [ + "def fetch_from_ftp(url: str, file: Path) -> None:\n", + " \"\"\"Helper to fetch data from the FTP server.\"\"\"\n", + " if not file.parent.exists():\n", + " file.parent.mkdir(parents=True, exist_ok=True)\n", + "\n", + " if not file.exists():\n", + " print(f\"Downloading data from {url}\")\n", + " urlretrieve(url, file)\n", + "\n", + " print(f\"Data was loaded!\")\n", + "\n", + "\n", + "def download_data(**kwargs) -> pd.DataFrame:\n", + " \"\"\"Download the dataset using URL.\"\"\"\n", + " fetch_from_ftp(DATA_URL, DATA_PATH)\n", + " _df = pd.read_json(DATA_PATH, lines=True, **kwargs)\n", + " return _df\n", + "\n", + "\n", + "def preprocess_data(df: pd.DataFrame) -> pd.DataFrame:\n", + " \"\"\"Perform data-preprocessing steps.\"\"\"\n", + " print(f\"Start data preprocessing...\")\n", + "\n", + " # Select columns.\n", + " df = df[[\"reviewText\", \"helpful\"]]\n", + "\n", + " # Remove Null-characters (x00) from the dataset.\n", + " df.reviewText = df.reviewText.apply(lambda x: x.replace(\"\\x00\", \"\"))\n", + "\n", + " # Extract numbers of helpful and total votes.\n", + " df['helpful_ratings'] = df.helpful.apply(lambda x: x[0])\n", + " df['total_ratings'] = df.helpful.apply(lambda x: x[1])\n", + "\n", + " # Filter unreasonable comments.\n", + " df = df[df.total_ratings > 10]\n", + "\n", + " # Create target column.\n", + " df[TARGET_NAME] = np.where((df.helpful_ratings / df.total_ratings) > TARGET_THRESHOLD, 1, 0).astype(int)\n", + "\n", + " # Delete columns we don't need anymore.\n", + " df.drop(columns=[\"helpful\", 'helpful_ratings', 'total_ratings'], inplace=True)\n", + "\n", + " print(\"Data preprocessing finished!\")\n", + "\n", + " return df" + ] + }, + { + "cell_type": "code", + "execution_count": 44, + "metadata": { + "ExecuteTime": { + "start_time": "2023-08-21T10:23:43.634130Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Data was loaded!\n", + "Start data preprocessing...\n", + "Data preprocessing finished!\n" + ] + } + ], + "source": [ + "reviews_df = download_data()\n", + "reviews_df = preprocess_data(reviews_df)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "collapsed": false + }, + "source": [ + "### Train-test split" + ] + }, + { + "cell_type": "code", + "execution_count": 45, + "metadata": { + "ExecuteTime": { + "start_time": "2023-08-21T10:24:08.127596Z" + } + }, + "outputs": [], + "source": [ + "X_train, X_test, y_train, y_test = train_test_split(reviews_df[[\"reviewText\"]], reviews_df[TARGET_NAME],\n", + " test_size=TEST_RATIO, random_state=RANDOM_SEED, \n", + " stratify=reviews_df[TARGET_NAME])" + ] }, - "collapsed": false - }, - "outputs": [ { - "data": { - "text/html": [ - "\n", - "" + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Wrap dataset with Giskard" + ] + }, + { + "cell_type": "code", + "execution_count": 46, + "metadata": { + "ExecuteTime": { + "start_time": "2023-08-21T10:24:08.164230Z" + } + }, + "outputs": [], + "source": [ + "raw_data = pd.concat([X_test, y_test], axis=1)\n", + "wrapped_data = Dataset(\n", + " df=raw_data, # A pandas.DataFrame that contains the raw data (before all the pre-processing steps) and the actual ground truth variable (target).\n", + " target=TARGET_NAME, # Ground truth variable.\n", + " name=\"reviews\", # Optional.\n", + " )" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Model training" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "collapsed": false + }, + "source": [ + "### Define preprocessing pipeline" + ] + }, + { + "cell_type": "code", + "execution_count": 47, + "metadata": { + "ExecuteTime": { + "start_time": "2023-08-21T10:24:08.192882Z" + } + }, + "outputs": [], + "source": [ + "def remove_punctuation(x):\n", + " \"\"\"Remove punctuation from input string.\"\"\"\n", + " x = x.reviewText.apply(lambda row: row.translate(str.maketrans('', '', string.punctuation)))\n", + " return x\n", + "\n", + "\n", + "preprocessor = Pipeline(steps=[\n", + " (\"punctuation\", FunctionTransformer(remove_punctuation)),\n", + " (\"vectorizer\", TfidfVectorizer(stop_words='english', min_df=0.01))\n", + "])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Build estimator" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pipeline = Pipeline(steps=[\n", + " (\"preprocessor\", preprocessor),\n", + " (\"estimator\", LogisticRegression(random_state=RANDOM_SEED, class_weight=\"balanced\"))\n", + "])\n", + "\n", + "pipeline.fit(X_train, y_train)\n", + "\n", + "# ROC-AUC score.\n", + "train_metric = roc_auc_score(y_train, pipeline.predict_proba(X_train)[:, 1])\n", + "test_metric = roc_auc_score(y_test, pipeline.predict_proba(X_test)[:, 1])\n", + "print(f\"Train ROC-AUC score: {train_metric:.2f}\")\n", + "print(f\"Test ROC-AUC score: {test_metric:.2f}\")\n", + "\n", + "# Balanced accuracy to account for imbalanced targets.\n", + "b_acc_train = balanced_accuracy_score(y_train, pipeline.predict(X_train))\n", + "b_acc_test = balanced_accuracy_score(y_test, pipeline.predict(X_test))\n", + "print(f\"Train balanced accuracy: {b_acc_train:.2f}\")\n", + "print(f\"Test balanced accuracy: {b_acc_test:.2f}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Wrap model with Giskard" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "wrapped_model = Model(\n", + " model=pipeline.predict_proba, # A prediction function that encapsulates all the data pre-processing steps and that could be executed with the dataset used by the scan.\n", + " model_type=\"classification\", # Either regression, classification or text_generation.\n", + " name=\"review_helpfulness_predictor\", # Optional.\n", + " classification_labels=[0, 1], # Their order MUST be identical to the prediction_function's output order.\n", + " feature_names=[\"reviewText\"], # Default: all columns of your dataset.\n", + " # classification_threshold=0.5 # Default: 0.5.\n", + ")\n", + "\n", + "# Validate wrapped model.\n", + "wrapped_predict = wrapped_model.predict(wrapped_data).raw[:, 1]\n", + "wrapped_test_metric = roc_auc_score(y_test, wrapped_predict)\n", + "\n", + "print(f\"Wrapped Test ROC-AUC score: {wrapped_test_metric:.2f}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Scan your model to find vulnerabilities\n", + "With the Giskard scan feature, you can detect vulnerabilities in your model, including performance biases, unrobustness, data leakage, stochasticity, underconfidence, ethical issues, and more. For detailed information about the scan feature, please refer to our scan documentation." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "results = giskard.scan(model=wrapped_model, dataset=wrapped_data)" + ] + }, + { + "cell_type": "code", + "execution_count": 51, + "metadata": { + "ExecuteTime": { + "end_time": "2023-08-21T10:58:12.495312Z", + "start_time": "2023-08-21T10:58:11.280446Z" + }, + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "display(results)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "collapsed": false + }, + "source": [ + "## Generate a test suite from the Scan\n", + "The objects produced by the scan can be used as fixtures to generate a test suite that integrate domain-specific issues. To create custom tests, refer to the Test your ML Model page." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "test_suite = results.generate_test_suite(\"My first test suite\")\n", + "test_suite.run()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "collapsed": false + }, + "source": [ + "## Customize your suite by loading objects from the Giskard catalog\n", + "\n", + "The Giskard open source catalog will enable to load:\n", + "\n", + "* Tests such as metamorphic, performance, prediction & data drift, statistical tests, etc\n", + "* Slicing functions such as detectors of toxicity, hate, emotion, etc\n", + "* Transformation functions such as generators of typos, paraphrase, style tune, etc\n", + "\n", + "For demo purposes, we will load a simple unit test (test_f1) that checks if the test F1 score is above the given threshold. For more examples of tests and functions, refer to the Giskard catalog." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "test_suite.add_test(testing.test_f1(model=wrapped_model, dataset=wrapped_data, threshold=0.7)).run()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Upload your suite to the Giskard server\n", + "\n", + "Upload your suite to the Giskard server to:\n", + "\n", + "* Compare models to decide which model to promote\n", + "* Debug your tests to diagnose the issues\n", + "* Create more domain-specific tests that are integrating business feedback\n", + "* Share your results" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Uploading the test suite will automatically save the model, dataset, tests, slicing & transformation functions inside the Giskard UI server\n", + "# Create a Giskard client after having install the Giskard server (see documentation)\n", + "token = \"API_TOKEN\" # Find it in Settings in the Giskard server\n", + "client = GiskardClient(\n", + " url=\"http://localhost:19000\", # URL of your Giskard instance\n", + " token=token\n", + ")\n", + "\n", + "my_project = client.create_project(\"my_project\", \"PROJECT_NAME\", \"DESCRIPTION\")\n", + "\n", + "# Upload to the current project ✉️\n", + "test_suite.upload(client, \"my_project\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "collapsed": false + }, + "source": [ + "
\n", + "Connecting Google Colab with the Giskard server\n", + "\n", + "If you are using Google Colab and you want to install the Giskard server **locally**, you can run the Giskard server by executing this line in the terminal of your **local** machine (see the [documentation](https://docs.giskard.ai/en/latest/guides/installation_app/index.html)):\n", + "\n", + "> giskard server start\n", + "\n", + "Once the Giskard server is running, from the same terminal on your **local** machine, you can run:\n", + "\n", + "> giskard server expose --ngrok-token \\\n", + "\n", + "Read the flowing [instructions](https://docs.giskard.ai/en/latest/cli/ngrok/index.html) in order to get the `ngrok_API_token`. This will provide you with the code snippets that you can copy and paste into your Colab notebook to establish a connection with your locally installed Giskard server\n", + "
" ] - }, - "metadata": {}, - "output_type": "display_data" } - ], - "source": [ - "display(results)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "collapsed": false - }, - "source": [ - "## Generate a test suite from the Scan\n", - "The objects produced by the scan can be used as fixtures to generate a test suite that integrate domain-specific issues. To create custom tests, refer to the Test your ML Model page." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "test_suite = results.generate_test_suite(\"My first test suite\")\n", - "test_suite.run()" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "collapsed": false - }, - "source": [ - "## Customize your suite by loading objects from the Giskard catalog\n", - "\n", - "The Giskard open source catalog will enable to load:\n", - "\n", - "* Tests such as metamorphic, performance, prediction & data drift, statistical tests, etc\n", - "* Slicing functions such as detectors of toxicity, hate, emotion, etc\n", - "* Transformation functions such as generators of typos, paraphrase, style tune, etc\n", - "\n", - "For demo purposes, we will load a simple unit test (test_f1) that checks if the test F1 score is above the given threshold. For more examples of tests and functions, refer to the Giskard catalog." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "test_suite.add_test(testing.test_f1(model=wrapped_model, dataset=wrapped_data, threshold=0.7)).run()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Upload your suite to the Giskard server\n", - "\n", - "Upload your suite to the Giskard server to:\n", - "\n", - "* Compare models to decide which model to promote\n", - "* Debug your tests to diagnose the issues\n", - "* Create more domain-specific tests that are integrating business feedback\n", - "* Share your results" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Uploading the test suite will automatically save the model, dataset, tests, slicing & transformation functions inside the Giskard UI server\n", - "# Create a Giskard client after having install the Giskard server (see documentation)\n", - "token = \"API_TOKEN\" # Find it in Settings in the Giskard server\n", - "client = GiskardClient(\n", - " url=\"http://localhost:19000\", # URL of your Giskard instance\n", - " token=token\n", - ")\n", - "\n", - "my_project = client.create_project(\"my_project\", \"PROJECT_NAME\", \"DESCRIPTION\")\n", - "\n", - "# Upload to the current project ✉️\n", - "test_suite.upload(client, \"my_project\")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "collapsed": false - }, - "source": [ - "
\n", - "Connecting Google Colab with the Giskard server\n", - "\n", - "If you are using Google Colab and you want to install the Giskard server **locally**, you can run the Giskard server by executing this line in the terminal of your **local** machine (see the [documentation](https://docs.giskard.ai/en/latest/guides/installation_app/index.html)):\n", - "\n", - "> giskard server start\n", - "\n", - "Once the Giskard server is running, from the same terminal on your **local** machine, you can run:\n", - "\n", - "> giskard server expose --token \\\n", - "\n", - "Read the flowing [instructions](https://docs.giskard.ai/en/latest/cli/ngrok/index.html) in order to get the `ngrok_API_token`. This will provide you with the code snippets that you can copy and paste into your Colab notebook to establish a connection with your locally installed Giskard server\n", - "
" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.11" + } }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.10.11" - } - }, - "nbformat": 4, - "nbformat_minor": 1 -} + "nbformat": 4, + "nbformat_minor": 1 +} \ No newline at end of file diff --git a/python-client/docs/reference/notebooks/api_model.ipynb b/python-client/docs/reference/notebooks/api_model.ipynb index 87e0f056a3..4582e43be3 100644 --- a/python-client/docs/reference/notebooks/api_model.ipynb +++ b/python-client/docs/reference/notebooks/api_model.ipynb @@ -1,147 +1,147 @@ { - "cells": [ - { - "attachments": {}, - "cell_type": "markdown", - "id": "f6dc35b9", - "metadata": {}, - "source": [ - "# Connecting Giskard to an AI model hosted by API" - ] + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "id": "f6dc35b9", + "metadata": {}, + "source": [ + "# Connecting Giskard to an AI model hosted by API" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0361f8a8", + "metadata": {}, + "outputs": [], + "source": [ + "pip install \"giskard>=2.0.0b\" -U" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "fd6208a2", + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import requests\n", + "\n", + "import giskard" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2a30c144", + "metadata": {}, + "outputs": [], + "source": [ + "def prediction_function(input_data: pd.DataFrame):\n", + " # Set up the API endpoint URL and parameters\n", + " api_endpoint = \"https://api.example.com/predict\"\n", + " api_params = {\"input\": input_data}\n", + "\n", + " # Send a GET request to the API endpoint and get the response\n", + " response = requests.get(api_endpoint, params=api_params)\n", + "\n", + " # Raise error if an error has occurred\n", + " response.raise_for_status()\n", + "\n", + " # Extract the predictions from the JSON response" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "23e3f716", + "metadata": {}, + "outputs": [], + "source": [ + "# Wrap your model with Giskard.Model:\n", + "giskard_model = giskard.Model(\n", + " model=prediction_function,\n", + " model_type=\"classification\",\n", + " feature_names=[\"feature1\", \"feature2\", \"feature3\"],\n", + " classification_labels=[\"label1\", \"label2\"],\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5028344d", + "metadata": {}, + "outputs": [], + "source": [ + "# Wrap your Pandas DataFrame with Giskard.Dataset, containing examples such as:\n", + "giskard_dataset = giskard.Dataset(df=..., target=\"target\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f1b5f71b", + "metadata": {}, + "outputs": [], + "source": [ + "# Then apply the scan\n", + "results = giskard.scan(giskard_model, giskard_dataset)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1e3bbe54", + "metadata": {}, + "outputs": [], + "source": [ + "display(results)" + ] + }, + { + "cell_type": "markdown", + "source": [ + "
\n", + "Connecting Google Colab with the Giskard server\n", + "\n", + "If you are using Google Colab and you want to install the Giskard server **locally**, you can run the Giskard server by executing this line in the terminal of your **local** machine (see the [documentation](https://docs.giskard.ai/en/latest/guides/installation_app/index.html)):\n", + "\n", + "> giskard server start\n", + "\n", + "Once the Giskard server is running, from the same terminal on your **local** machine, you can run:\n", + "\n", + "> giskard server expose --ngrok-token \\\n", + "\n", + "Read the flowing [instructions](https://docs.giskard.ai/en/latest/cli/ngrok/index.html) in order to get the `ngrok_API_token`. This will provide you with the code snippets that you can copy and paste into your Colab notebook to establish a connection with your locally installed Giskard server\n", + "
" + ], + "metadata": { + "collapsed": false + } + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.11" + } }, - { - "cell_type": "code", - "execution_count": null, - "id": "0361f8a8", - "metadata": {}, - "outputs": [], - "source": [ - "pip install \"giskard>=2.0.0b\" -U" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "id": "fd6208a2", - "metadata": {}, - "outputs": [], - "source": [ - "import pandas as pd\n", - "import requests\n", - "\n", - "import giskard" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "2a30c144", - "metadata": {}, - "outputs": [], - "source": [ - "def prediction_function(input_data: pd.DataFrame):\n", - " # Set up the API endpoint URL and parameters\n", - " api_endpoint = \"https://api.example.com/predict\"\n", - " api_params = {\"input\": input_data}\n", - "\n", - " # Send a GET request to the API endpoint and get the response\n", - " response = requests.get(api_endpoint, params=api_params)\n", - "\n", - " # Raise error if an error has occurred\n", - " response.raise_for_status()\n", - "\n", - " # Extract the predictions from the JSON response" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "23e3f716", - "metadata": {}, - "outputs": [], - "source": [ - "# Wrap your model with Giskard.Model:\n", - "giskard_model = giskard.Model(\n", - " model=prediction_function,\n", - " model_type=\"classification\",\n", - " feature_names=[\"feature1\", \"feature2\", \"feature3\"],\n", - " classification_labels=[\"label1\", \"label2\"],\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "5028344d", - "metadata": {}, - "outputs": [], - "source": [ - "# Wrap your Pandas DataFrame with Giskard.Dataset, containing examples such as:\n", - "giskard_dataset = giskard.Dataset(df=..., target=\"target\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "f1b5f71b", - "metadata": {}, - "outputs": [], - "source": [ - "# Then apply the scan\n", - "results = giskard.scan(giskard_model, giskard_dataset)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "1e3bbe54", - "metadata": {}, - "outputs": [], - "source": [ - "display(results)" - ] - }, - { - "cell_type": "markdown", - "source": [ - "
\n", - "Connecting Google Colab with the Giskard server\n", - "\n", - "If you are using Google Colab and you want to install the Giskard server **locally**, you can run the Giskard server by executing this line in the terminal of your **local** machine (see the [documentation](https://docs.giskard.ai/en/latest/guides/installation_app/index.html)):\n", - "\n", - "> giskard server start\n", - "\n", - "Once the Giskard server is running, from the same terminal on your **local** machine, you can run:\n", - "\n", - "> giskard server expose --token \\\n", - "\n", - "Read the flowing [instructions](https://docs.giskard.ai/en/latest/cli/ngrok/index.html) in order to get the `ngrok_API_token`. This will provide you with the code snippets that you can copy and paste into your Colab notebook to establish a connection with your locally installed Giskard server\n", - "
" - ], - "metadata": { - "collapsed": false - } - } - ], - "metadata": { - "kernelspec": { - "display_name": ".venv", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.10.11" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} + "nbformat": 4, + "nbformat_minor": 5 +} \ No newline at end of file diff --git a/python-client/docs/reference/notebooks/cancer_detection_xgboost.ipynb b/python-client/docs/reference/notebooks/cancer_detection_xgboost.ipynb index 186eeff12b..2a0b941537 100644 --- a/python-client/docs/reference/notebooks/cancer_detection_xgboost.ipynb +++ b/python-client/docs/reference/notebooks/cancer_detection_xgboost.ipynb @@ -1,409 +1,409 @@ { - "cells": [ - { - "attachments": {}, - "cell_type": "markdown", - "id": "990eccb8", - "metadata": {}, - "source": [ - "# Breast cancer detection [XGBoost]\n", - "\n", - "
\n", - "What is Giskard ?\n", - "\n", - "Giskard is an open-source testing framework dedicated to ML models, ranging from tabular to LLM. [To know more about Giskard, click here](https://docs.giskard.ai/en/latest/getting-started/index.html).\n", - "
" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "40f23d1a", - "metadata": {}, - "source": [ - "By running this notebook, you'll create a whole test suite in a few lines of code. The model used here is a XGBoost classification model. It is used to detect breast cancer in a [dataset from UCI](https://archive.ics.uci.edu/ml/datasets/breast+cancer+wisconsin+(diagnostic)).\n", - "\n", - "You'll learn how to:\n", - "\n", - "- Detect vulnerabilities by scanning the model\n", - "\n", - "- Generate a test suite with domain-specific tests\n", - "\n", - "- Customize your test suite by loading a test from the Giskard catalog\n", - "\n", - "- Upload your model to the Giskard server to:\n", - "\n", - " - Compare models to decide which one to promote\n", - "\n", - " - Debug your tests to diagnose issues\n", - "\n", - " - Share your results and collect business feedback from your team\n" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "da9a6eac", - "metadata": {}, - "source": [ - "## Install Giskard\n", - "\n", - "To see the list of Python requirements, please refer to [the documentation](https://docs.giskard.ai/en/latest/guides/installation_library/index.html)." - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "metadata": { - "collapsed": false, - "ExecuteTime": { - "end_time": "2023-08-21T11:49:35.078459Z", - "start_time": "2023-08-21T11:49:35.038489Z" - } - }, - "outputs": [], - "source": [ - "pip install \"giskard>=2.0.0b\" -U" - ], - "id": "eb828d6da954f51d" - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "58613d7d", - "metadata": {}, - "source": [ - "## Import libraries" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "id": "7d960163", - "metadata": { - "ExecuteTime": { - "end_time": "2023-08-21T11:49:35.361546Z", - "start_time": "2023-08-21T11:49:35.352957Z" - } - }, - "outputs": [], - "source": [ - "import pandas as pd\n", - "from sklearn.datasets import load_breast_cancer\n", - "from sklearn.model_selection import train_test_split\n", - "from xgboost import XGBClassifier\n", - "\n", - "import giskard" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "d7cb9261", - "metadata": {}, - "source": [ - "## Import data and load it into Giskard" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "id": "e3c3e6a5", - "metadata": { - "ExecuteTime": { - "end_time": "2023-08-21T11:49:35.883048Z", - "start_time": "2023-08-21T11:49:35.734501Z" - } - }, - "outputs": [], - "source": [ - "# Define constants\n", - "TARGET_COLUMN_NAME = \"target\"\n", - "\n", - "# Load data from scikit-learn resources\n", - "raw_data = load_breast_cancer(as_frame=True)\n", - "df = pd.concat([raw_data.data, raw_data.target], axis=1)\n", - "column_types = {col: \"numeric\" for col in raw_data.data.columns}\n", - "\n", - "# Wrap the dataframe into Giskard\n", - "dataset = giskard.Dataset(\n", - " df=df, # A pandas.DataFrame that contains the raw data (before all the pre-processing steps) and the actual ground truth variable (target).\n", - " target=\"target\", # Ground truth variable.\n", - " name=\"breast_cancer\", # Optional.\n", - ")" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "803e36d9", - "metadata": {}, - "source": [ - "## Create your model & wrap it into Giskard" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "5b964cd7", - "metadata": {}, - "source": [ - "### Train your model" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "dd6c0986", - "metadata": {}, - "outputs": [], - "source": [ - "# Constants\n", - "RANDOM_SEED = 42\n", - "\n", - "# Train/test split\n", - "X_train, X_test, y_train, y_test = train_test_split(\n", - " df.loc[:, df.columns != TARGET_COLUMN_NAME],\n", - " df[TARGET_COLUMN_NAME],\n", - " random_state=RANDOM_SEED,\n", - ")\n", - "\n", - "# Train model\n", - "xgb = XGBClassifier(objective='binary:logistic')\n", - "xgb.fit(X_train, y_train)" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "a005a413", - "metadata": {}, - "source": [ - "### Wrap your model in Giskard" - ] - }, - { - "cell_type": "code", - "execution_count": 17, - "id": "db6c8ef8", - "metadata": { - "ExecuteTime": { - "end_time": "2023-08-21T11:49:37.064080Z", - "start_time": "2023-08-21T11:49:37.019355Z" - } - }, - "outputs": [], - "source": [ - "model = giskard.Model(\n", - " model=xgb, # A prediction function that encapsulates all the data pre-processing steps and that could be executed with the dataset used by the scan.\n", - " model_type=\"classification\", # Either regression, classification or text_generation.\n", - " name=\"breast_cancer_xgboost\", # Optional.\n", - " classification_labels=[0, 1], # Their order MUST be identical to the prediction_function's output order.\n", - " feature_names=X_test.columns, # Default: all columns of your dataset.\n", - " # classification_threshold=0.5, # Default: 0.5.\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "collapsed": false - }, - "source": [ - "## Scan your model to find vulnerabilities\n", - "\n", - "With the Giskard scan feature, you can detect vulnerabilities in your model, including *performance biases*, *unrobustness*, *data leakage*, *stochasticity*, *underconfidence*, *ethical issues*, and *more*. For detailed information about the scan feature, please refer to our scan [documentation](https://docs.giskard.ai/en/latest/guides/scan/index.html)." - ], - "id": "4fa6a666db37d7af" - }, - { - "cell_type": "code", - "execution_count": null, - "id": "931b100b", - "metadata": {}, - "outputs": [], - "source": [ - "results = giskard.scan(model, dataset)" - ] - }, - { - "cell_type": "code", - "execution_count": 19, - "id": "ecb49fa5", - "metadata": { - "ExecuteTime": { - "end_time": "2023-08-21T11:50:22.905442Z", - "start_time": "2023-08-21T11:50:21.882067Z" - } - }, - "outputs": [ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "id": "990eccb8", + "metadata": {}, + "source": [ + "# Breast cancer detection [XGBoost]\n", + "\n", + "
\n", + "What is Giskard ?\n", + "\n", + "Giskard is an open-source testing framework dedicated to ML models, ranging from tabular to LLM. [To know more about Giskard, click here](https://docs.giskard.ai/en/latest/getting-started/index.html).\n", + "
" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "40f23d1a", + "metadata": {}, + "source": [ + "By running this notebook, you'll create a whole test suite in a few lines of code. The model used here is a XGBoost classification model. It is used to detect breast cancer in a [dataset from UCI](https://archive.ics.uci.edu/ml/datasets/breast+cancer+wisconsin+(diagnostic)).\n", + "\n", + "You'll learn how to:\n", + "\n", + "- Detect vulnerabilities by scanning the model\n", + "\n", + "- Generate a test suite with domain-specific tests\n", + "\n", + "- Customize your test suite by loading a test from the Giskard catalog\n", + "\n", + "- Upload your model to the Giskard server to:\n", + "\n", + " - Compare models to decide which one to promote\n", + "\n", + " - Debug your tests to diagnose issues\n", + "\n", + " - Share your results and collect business feedback from your team\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "da9a6eac", + "metadata": {}, + "source": [ + "## Install Giskard\n", + "\n", + "To see the list of Python requirements, please refer to [the documentation](https://docs.giskard.ai/en/latest/guides/installation_library/index.html)." + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": { + "collapsed": false, + "ExecuteTime": { + "end_time": "2023-08-21T11:49:35.078459Z", + "start_time": "2023-08-21T11:49:35.038489Z" + } + }, + "outputs": [], + "source": [ + "pip install \"giskard>=2.0.0b\" -U" + ], + "id": "eb828d6da954f51d" + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "58613d7d", + "metadata": {}, + "source": [ + "## Import libraries" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "7d960163", + "metadata": { + "ExecuteTime": { + "end_time": "2023-08-21T11:49:35.361546Z", + "start_time": "2023-08-21T11:49:35.352957Z" + } + }, + "outputs": [], + "source": [ + "import pandas as pd\n", + "from sklearn.datasets import load_breast_cancer\n", + "from sklearn.model_selection import train_test_split\n", + "from xgboost import XGBClassifier\n", + "\n", + "import giskard" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "d7cb9261", + "metadata": {}, + "source": [ + "## Import data and load it into Giskard" + ] + }, { - "data": { - "text/html": "\n" - }, - "metadata": {}, - "output_type": "display_data" + "cell_type": "code", + "execution_count": 15, + "id": "e3c3e6a5", + "metadata": { + "ExecuteTime": { + "end_time": "2023-08-21T11:49:35.883048Z", + "start_time": "2023-08-21T11:49:35.734501Z" + } + }, + "outputs": [], + "source": [ + "# Define constants\n", + "TARGET_COLUMN_NAME = \"target\"\n", + "\n", + "# Load data from scikit-learn resources\n", + "raw_data = load_breast_cancer(as_frame=True)\n", + "df = pd.concat([raw_data.data, raw_data.target], axis=1)\n", + "column_types = {col: \"numeric\" for col in raw_data.data.columns}\n", + "\n", + "# Wrap the dataframe into Giskard\n", + "dataset = giskard.Dataset(\n", + " df=df, # A pandas.DataFrame that contains the raw data (before all the pre-processing steps) and the actual ground truth variable (target).\n", + " target=\"target\", # Ground truth variable.\n", + " name=\"breast_cancer\", # Optional.\n", + ")" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "803e36d9", + "metadata": {}, + "source": [ + "## Create your model & wrap it into Giskard" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "5b964cd7", + "metadata": {}, + "source": [ + "### Train your model" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "dd6c0986", + "metadata": {}, + "outputs": [], + "source": [ + "# Constants\n", + "RANDOM_SEED = 42\n", + "\n", + "# Train/test split\n", + "X_train, X_test, y_train, y_test = train_test_split(\n", + " df.loc[:, df.columns != TARGET_COLUMN_NAME],\n", + " df[TARGET_COLUMN_NAME],\n", + " random_state=RANDOM_SEED,\n", + ")\n", + "\n", + "# Train model\n", + "xgb = XGBClassifier(objective='binary:logistic')\n", + "xgb.fit(X_train, y_train)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "a005a413", + "metadata": {}, + "source": [ + "### Wrap your model in Giskard" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "db6c8ef8", + "metadata": { + "ExecuteTime": { + "end_time": "2023-08-21T11:49:37.064080Z", + "start_time": "2023-08-21T11:49:37.019355Z" + } + }, + "outputs": [], + "source": [ + "model = giskard.Model(\n", + " model=xgb, # A prediction function that encapsulates all the data pre-processing steps and that could be executed with the dataset used by the scan.\n", + " model_type=\"classification\", # Either regression, classification or text_generation.\n", + " name=\"breast_cancer_xgboost\", # Optional.\n", + " classification_labels=[0, 1], # Their order MUST be identical to the prediction_function's output order.\n", + " feature_names=X_test.columns, # Default: all columns of your dataset.\n", + " # classification_threshold=0.5, # Default: 0.5.\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "collapsed": false + }, + "source": [ + "## Scan your model to find vulnerabilities\n", + "\n", + "With the Giskard scan feature, you can detect vulnerabilities in your model, including *performance biases*, *unrobustness*, *data leakage*, *stochasticity*, *underconfidence*, *ethical issues*, and *more*. For detailed information about the scan feature, please refer to our scan [documentation](https://docs.giskard.ai/en/latest/guides/scan/index.html)." + ], + "id": "4fa6a666db37d7af" + }, + { + "cell_type": "code", + "execution_count": null, + "id": "931b100b", + "metadata": {}, + "outputs": [], + "source": [ + "results = giskard.scan(model, dataset)" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "ecb49fa5", + "metadata": { + "ExecuteTime": { + "end_time": "2023-08-21T11:50:22.905442Z", + "start_time": "2023-08-21T11:50:21.882067Z" + } + }, + "outputs": [ + { + "data": { + "text/html": "\n" + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "display(results) # in your notebook" + ] + }, + { + "cell_type": "markdown", + "id": "8b3343fa", + "metadata": {}, + "source": [ + "As you see above, the model may detect various vulnerabilites by displaying:\n", + "\n", + "* Data slices showing unperformance, underconfidence, overconfidence or spurious correlations\n", + "* Data transformations creating robutness or ethical issues\n", + "* Examples making some tests fail\n" + ] + }, + { + "cell_type": "markdown", + "id": "0667bdc9", + "metadata": {}, + "source": [ + "## Generate a test suite from the Scan\n", + "\n", + "The objects produced by the scan can be used as fixtures to generate a test suite that integrate domain-specific issues. To create custom tests, refer to the [Test your ML Model](https://docs.giskard.ai/en/latest/guides/test-suite/index.html) page." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bea736a9", + "metadata": {}, + "outputs": [], + "source": [ + "test_suite = results.generate_test_suite(\"My first test suite\")\n", + "\n", + "# You can run the test suite locally to verify that it reproduces the issues\n", + "test_suite.run()" + ] + }, + { + "cell_type": "markdown", + "id": "882f4638", + "metadata": {}, + "source": [ + "## Customize your suite by loading objects from the Giskard catalog" + ] + }, + { + "cell_type": "markdown", + "id": "502a0767", + "metadata": {}, + "source": [ + "The Giskard open source catalog will enable to load:\n", + "\n", + "* **Tests** such as metamorphic, performance, prediction & data drift, statistical tests, etc\n", + "* **Slicing functions** such as detectors of toxicity, hate, emotion, etc\n", + "* **Transformation functions** such as generators of typos, paraphrase, style tune, etc\n", + "\n", + "For demo purposes, we will load a simple unit test ([test_right_label](https://docs.giskard.ai/en/latest/reference/tests/statistic.html#giskard.testing.test_right_label)) that checks if a given row (the first example) has the right label. For more examples of tests and functions, refer to the [Giskard catalog](https://docs.giskard.ai/en/latest/guides/catalog/index.html)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0c3a076", + "metadata": {}, + "outputs": [], + "source": [ + "# For the test_right_label test we are adding, all the parameters are specified except model\n", + "# This means that we will need to specify model everytime we run the suite: model is a global parameter of the suite\n", + "suite = test_suite \\\n", + " .add_test(\n", + " giskard.testing.test_right_label(dataset=dataset.iloc[[1]], classification_label=\"yes\", threshold=1)).run()" + ] + }, + { + "cell_type": "markdown", + "id": "cf824254", + "metadata": {}, + "source": [ + "## Upload your suite to the Giskard server\n", + "\n", + "
\n", + "Install Giskard Server\n", + "\n", + "To upload your suite to the Giskard Server you must first run the Giskard Server. Refer to the [documentation](https://docs.giskard.ai/en/latest/guides/installation_app/index.html).\n", + "
\n", + "\n", + "Upload your suite to the Giskard server to:\n", + "\n", + "- Compare models to decide which model to promote\n", + "- Debug your tests to diagnose the issues\n", + "- Create more domain-specific tests that are integrating business feedback\n", + "- Share your results" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8efd6bf3", + "metadata": {}, + "outputs": [], + "source": [ + "# Uploading the test suite will automatically save the model, dataset, tests, slicing & transformation functions inside the Giskard UI server\n", + "# Create a Giskard client aftern having install the Giskard server (see documentation)\n", + "token = \"API_TOKEN\" # Find it in Settings in the Giskard server\n", + "client = giskard.GiskardClient(\n", + " url=\"http://localhost:19000\", # URL of your Giskard instance\n", + " token=token\n", + ")\n", + "\n", + "my_project = client.create_project(\"my_project\", \"PROJECT_NAME\", \"DESCRIPTION\")\n", + "\n", + "# Upload to the current project ✉️\n", + "test_suite.upload(client, \"my_project\")" + ] + }, + { + "cell_type": "markdown", + "source": [ + "
\n", + "Connecting Google Colab with the Giskard server\n", + "\n", + "If you are using Google Colab and you want to install the Giskard server **locally**, you can run the Giskard server by executing this line in the terminal of your **local** machine (see the [documentation](https://docs.giskard.ai/en/latest/guides/installation_app/index.html)):\n", + "\n", + "> giskard server start\n", + "\n", + "Once the Giskard server is running, from the same terminal on your **local** machine, you can run:\n", + "\n", + "> giskard server expose --ngrok-token \\\n", + "\n", + "Read the flowing [instructions](https://docs.giskard.ai/en/latest/cli/ngrok/index.html) in order to get the `ngrok_API_token`. This will provide you with the code snippets that you can copy and paste into your Colab notebook to establish a connection with your locally installed Giskard server\n", + "
" + ], + "metadata": { + "collapsed": false + }, + "id": "193983c206c0103f" + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.11" } - ], - "source": [ - "display(results) # in your notebook" - ] - }, - { - "cell_type": "markdown", - "id": "8b3343fa", - "metadata": {}, - "source": [ - "As you see above, the model may detect various vulnerabilites by displaying:\n", - "\n", - "* Data slices showing unperformance, underconfidence, overconfidence or spurious correlations\n", - "* Data transformations creating robutness or ethical issues\n", - "* Examples making some tests fail\n" - ] - }, - { - "cell_type": "markdown", - "id": "0667bdc9", - "metadata": {}, - "source": [ - "## Generate a test suite from the Scan\n", - "\n", - "The objects produced by the scan can be used as fixtures to generate a test suite that integrate domain-specific issues. To create custom tests, refer to the [Test your ML Model](https://docs.giskard.ai/en/latest/guides/test-suite/index.html) page." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "bea736a9", - "metadata": {}, - "outputs": [], - "source": [ - "test_suite = results.generate_test_suite(\"My first test suite\")\n", - "\n", - "# You can run the test suite locally to verify that it reproduces the issues\n", - "test_suite.run()" - ] - }, - { - "cell_type": "markdown", - "id": "882f4638", - "metadata": {}, - "source": [ - "## Customize your suite by loading objects from the Giskard catalog" - ] - }, - { - "cell_type": "markdown", - "id": "502a0767", - "metadata": {}, - "source": [ - "The Giskard open source catalog will enable to load:\n", - "\n", - "* **Tests** such as metamorphic, performance, prediction & data drift, statistical tests, etc\n", - "* **Slicing functions** such as detectors of toxicity, hate, emotion, etc\n", - "* **Transformation functions** such as generators of typos, paraphrase, style tune, etc\n", - "\n", - "For demo purposes, we will load a simple unit test ([test_right_label](https://docs.giskard.ai/en/latest/reference/tests/statistic.html#giskard.testing.test_right_label)) that checks if a given row (the first example) has the right label. For more examples of tests and functions, refer to the [Giskard catalog](https://docs.giskard.ai/en/latest/guides/catalog/index.html)." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "b0c3a076", - "metadata": {}, - "outputs": [], - "source": [ - "# For the test_right_label test we are adding, all the parameters are specified except model\n", - "# This means that we will need to specify model everytime we run the suite: model is a global parameter of the suite\n", - "suite = test_suite \\\n", - " .add_test(\n", - " giskard.testing.test_right_label(dataset=dataset.iloc[[1]], classification_label=\"yes\", threshold=1)).run()" - ] - }, - { - "cell_type": "markdown", - "id": "cf824254", - "metadata": {}, - "source": [ - "## Upload your suite to the Giskard server\n", - "\n", - "
\n", - "Install Giskard Server\n", - "\n", - "To upload your suite to the Giskard Server you must first run the Giskard Server. Refer to the [documentation](https://docs.giskard.ai/en/latest/guides/installation_app/index.html).\n", - "
\n", - "\n", - "Upload your suite to the Giskard server to:\n", - "\n", - "- Compare models to decide which model to promote\n", - "- Debug your tests to diagnose the issues\n", - "- Create more domain-specific tests that are integrating business feedback\n", - "- Share your results" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "8efd6bf3", - "metadata": {}, - "outputs": [], - "source": [ - "# Uploading the test suite will automatically save the model, dataset, tests, slicing & transformation functions inside the Giskard UI server\n", - "# Create a Giskard client aftern having install the Giskard server (see documentation)\n", - "token = \"API_TOKEN\" # Find it in Settings in the Giskard server\n", - "client = giskard.GiskardClient(\n", - " url=\"http://localhost:19000\", # URL of your Giskard instance\n", - " token=token\n", - ")\n", - "\n", - "my_project = client.create_project(\"my_project\", \"PROJECT_NAME\", \"DESCRIPTION\")\n", - "\n", - "# Upload to the current project ✉️\n", - "test_suite.upload(client, \"my_project\")" - ] - }, - { - "cell_type": "markdown", - "source": [ - "
\n", - "Connecting Google Colab with the Giskard server\n", - "\n", - "If you are using Google Colab and you want to install the Giskard server **locally**, you can run the Giskard server by executing this line in the terminal of your **local** machine (see the [documentation](https://docs.giskard.ai/en/latest/guides/installation_app/index.html)):\n", - "\n", - "> giskard server start\n", - "\n", - "Once the Giskard server is running, from the same terminal on your **local** machine, you can run:\n", - "\n", - "> giskard server expose --token \\\n", - "\n", - "Read the flowing [instructions](https://docs.giskard.ai/en/latest/cli/ngrok/index.html) in order to get the `ngrok_API_token`. This will provide you with the code snippets that you can copy and paste into your Colab notebook to establish a connection with your locally installed Giskard server\n", - "
" - ], - "metadata": { - "collapsed": false - }, - "id": "193983c206c0103f" - } - ], - "metadata": { - "kernelspec": { - "display_name": ".venv", - "language": "python", - "name": "python3" }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.10.11" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} + "nbformat": 4, + "nbformat_minor": 5 +} \ No newline at end of file diff --git a/python-client/docs/reference/notebooks/churn_prediction_lgbm.ipynb b/python-client/docs/reference/notebooks/churn_prediction_lgbm.ipynb index 492d3c77a0..00e2ddf7ac 100644 --- a/python-client/docs/reference/notebooks/churn_prediction_lgbm.ipynb +++ b/python-client/docs/reference/notebooks/churn_prediction_lgbm.ipynb @@ -1,535 +1,535 @@ { - "cells": [ - { - "cell_type": "markdown", - "source": [ - "# Customer churn prediction [LGBM]\n", - "\n", - "By running this notebook, you’ll create a whole test suite in a few lines of code. The model used here is an LGBM classification model, which predicts customer churn in the cell company. Feel free to use your own model (tabular, text, or LLM).\n", - "\n", - "You’ll learn how to:\n", - "\n", - "* Detect vulnerabilities by scanning the model\n", - "* Generate a test suite with domain-specific tests\n", - "* Customize your test suite by loading a test from the Giskard catalog\n", - "* Upload your model to the Giskard server to:\n", - " * Compare models to decide which one to promote\n", - " * Debug your tests to diagnose issues\n", - " * Share your results and collect business feedback from your team" - ], - "metadata": { - "collapsed": false - } - }, - { - "cell_type": "markdown", - "source": [ - "## Install Giskard" - ], - "metadata": { - "collapsed": false - } - }, - { - "cell_type": "code", - "execution_count": 4, - "outputs": [], - "source": [ - "!pip install \"giskard>=2.0.0b\" -U" - ], - "metadata": { - "collapsed": false, - "ExecuteTime": { - "end_time": "2023-08-22T10:12:25.295802Z", - "start_time": "2023-08-22T10:12:25.284903Z" - } - } - }, - { - "cell_type": "markdown", - "metadata": { - "cell_id": "e8d609f32d5243dd917cc3104599b8d8", - "deepnote_app_coordinates": { - "h": 5, - "w": 12, - "x": 0, - "y": 12 - }, - "deepnote_cell_height": 230, - "deepnote_cell_type": "markdown", - "id": "WNI85koE7xbX", - "pycharm": { - "name": "#%% md\n" - }, - "tags": [] - }, - "source": [ - "## Import libraries" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": { - "ExecuteTime": { - "end_time": "2023-08-22T10:12:36.626998Z", - "start_time": "2023-08-22T10:12:36.583273Z" - } - }, - "outputs": [], - "source": [ - "import pandas as pd\n", - "from lightgbm import LGBMClassifier\n", - "from sklearn.pipeline import Pipeline\n", - "from sklearn.metrics import accuracy_score\n", - "from sklearn.compose import ColumnTransformer\n", - "from sklearn.preprocessing import OneHotEncoder\n", - "from sklearn.preprocessing import StandardScaler\n", - "from sklearn.model_selection import train_test_split\n", - "\n", - "from giskard import Dataset, Model, scan, GiskardClient, testing" - ] - }, - { - "cell_type": "markdown", - "source": [ - "## Define constants" - ], - "metadata": { - "collapsed": false - } - }, - { - "cell_type": "code", - "execution_count": 6, - "outputs": [], - "source": [ - "# Constants.\n", - "RANDOM_SEED = 123\n", - "\n", - "TARGET_COLUMN_NAME = \"Churn\"\n", - "\n", - "COLUMN_TYPES = {'gender': \"category\",\n", - " 'SeniorCitizen': \"category\",\n", - " 'Partner': \"category\",\n", - " 'Dependents': \"category\",\n", - " 'tenure': \"numeric\",\n", - " 'PhoneService': \"category\",\n", - " 'MultipleLines': \"category\",\n", - " 'InternetService': \"category\",\n", - " 'OnlineSecurity': \"category\",\n", - " 'OnlineBackup': \"category\",\n", - " 'DeviceProtection': \"category\",\n", - " 'TechSupport': \"category\",\n", - " 'StreamingTV': \"category\",\n", - " 'StreamingMovies': \"category\",\n", - " 'Contract': \"category\",\n", - " 'PaperlessBilling': \"category\",\n", - " 'PaymentMethod': \"category\",\n", - " 'MonthlyCharges': \"numeric\",\n", - " 'TotalCharges': \"numeric\",\n", - " TARGET_COLUMN_NAME: \"category\"}\n", - "\n", - "FEATURE_TYPES = {i:COLUMN_TYPES[i] for i in COLUMN_TYPES if i != TARGET_COLUMN_NAME}\n", - "\n", - "COLUMNS_TO_SCALE = [key for key in FEATURE_TYPES.keys() if FEATURE_TYPES[key] == \"numeric\"]\n", - "COLUMNS_TO_ENCODE = [key for key in FEATURE_TYPES.keys() if FEATURE_TYPES[key] == \"category\"]\n", - "\n", - "# Paths.\n", - "DATASET_URL = \"https://raw.githubusercontent.com/Giskard-AI/examples/main/datasets/WA_Fn-UseC_-Telco-Customer-Churn.csv\"" - ], - "metadata": { - "collapsed": false, - "ExecuteTime": { - "end_time": "2023-08-22T10:12:37.122948Z", - "start_time": "2023-08-22T10:12:37.103510Z" - } - } - }, - { - "cell_type": "markdown", - "source": [ - "## Dataset preparation" - ], - "metadata": { - "collapsed": false - } - }, - { - "cell_type": "markdown", - "source": [ - "### Load and preprocess data" - ], - "metadata": { - "collapsed": false - } - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": { - "scrolled": false, - "ExecuteTime": { - "end_time": "2023-08-22T10:12:38.794275Z", - "start_time": "2023-08-22T10:12:38.247656Z" - } - }, - "outputs": [], - "source": [ - "def preprocess(df: pd.DataFrame) -> pd.DataFrame:\n", - " \"\"\"Perform data-preprocessing steps.\"\"\"\n", - " df['TotalCharges'] = pd.to_numeric(df['TotalCharges'], errors='coerce')\n", - " df = df.dropna()\n", - " df = df.drop(columns='customerID')\n", - " df['PaymentMethod'] = df['PaymentMethod'].str.replace(' (automatic)', '', regex=False)\n", - " return df\n", - "\n", - "\n", - "churn_df = pd.read_csv(DATASET_URL)\n", - "churn_df = preprocess(churn_df)" - ] - }, - { - "cell_type": "markdown", - "source": [ - "### Train-test split" - ], - "metadata": { - "collapsed": false - } - }, - { - "cell_type": "code", - "execution_count": 8, - "outputs": [], - "source": [ - "X_train, X_test, Y_train, Y_test = train_test_split(churn_df.drop(columns=TARGET_COLUMN_NAME), \n", - " churn_df[TARGET_COLUMN_NAME],\n", - " random_state=RANDOM_SEED)" - ], - "metadata": { - "collapsed": false, - "ExecuteTime": { - "end_time": "2023-08-22T10:12:39.619478Z", - "start_time": "2023-08-22T10:12:39.552589Z" - } - } - }, - { - "cell_type": "markdown", - "source": [ - "### Wrap dataset with Giskard" - ], - "metadata": { - "collapsed": false - } - }, - { - "cell_type": "code", - "execution_count": 9, - "outputs": [], - "source": [ - "raw_data = pd.concat([X_test, Y_test], axis=1)\n", - "wrapped_data = Dataset(\n", - " df=raw_data, # A pandas.DataFrame that contains the raw data (before all the pre-processing steps) and the actual ground truth variable\n", - " target=TARGET_COLUMN_NAME, # Ground truth variable\n", - " name=\"Churn classification dataset\", # Optional\n", - " cat_columns=COLUMNS_TO_ENCODE # List of categorical columns. Optional, but is a MUST if available. Inferred automatically if not.\n", - ")" - ], - "metadata": { - "collapsed": false, - "ExecuteTime": { - "end_time": "2023-08-22T10:12:40.748778Z", - "start_time": "2023-08-22T10:12:40.649457Z" - } - } - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Model training" - ] - }, - { - "cell_type": "markdown", - "source": [ - "### Define preprocessing steps" - ], - "metadata": { - "collapsed": false - } - }, - { - "cell_type": "code", - "execution_count": 10, - "outputs": [], - "source": [ - "preprocessor = ColumnTransformer(transformers=[\n", - " ('num', StandardScaler(), COLUMNS_TO_SCALE),\n", - " ('cat', OneHotEncoder(handle_unknown='ignore',drop='first'), COLUMNS_TO_ENCODE)\n", - "])" - ], - "metadata": { - "collapsed": false, - "ExecuteTime": { - "end_time": "2023-08-22T10:12:42.530941Z", - "start_time": "2023-08-22T10:12:42.467324Z" - } - } - }, - { - "cell_type": "markdown", - "source": [ - "### Build estimator" - ], - "metadata": { - "collapsed": false - } - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "pipeline = Pipeline(steps=[\n", - " ('preprocessor', preprocessor),\n", - " ('classifier', LGBMClassifier(random_state=RANDOM_SEED))\n", - "])\n", - " \n", - "# Fit model.\n", - "pipeline.fit(X_train, Y_train)\n", - "\n", - "# Evaluate model.\n", - "Y_train_pred = pipeline.predict(X_train)\n", - "train_accuracy = accuracy_score(Y_train, Y_train_pred)\n", - "\n", - "Y_test_pred = pipeline.predict(X_test)\n", - "test_accuracy = accuracy_score(Y_test, Y_test_pred)\n", - "\n", - "print(f'Train Accuracy: {train_accuracy:.2f}')\n", - "print(f'Test Accuracy: {test_accuracy:.2f}')" - ] - }, - { - "cell_type": "markdown", - "source": [ - "### Wrap model with Giskard" - ], - "metadata": { - "collapsed": false - } - }, - { - "cell_type": "code", - "execution_count": null, - "outputs": [], - "source": [ - "wrapped_model = Model(\n", - " model=pipeline, # A prediction function that encapsulates all the data pre-processing steps and that could be executed with the dataset used by the scan.\n", - " model_type=\"classification\", # Either regression, classification or text_generation.\n", - " name=\"Churn classification\", # Optional\n", - " classification_labels=pipeline.classes_, # Their order MUST be identical to the prediction_function's output order\n", - " feature_names=FEATURE_TYPES.keys(), # Default: all columns of your dataset\n", - " # classification_threshold=0.5, # Default: 0.5\n", - ")\n", - "\n", - "# Validate wrapped model.\n", - "wrapped_Y_pred = wrapped_model.predict(wrapped_data).prediction\n", - "wrapped_accuracy = accuracy_score(Y_test, wrapped_Y_pred)\n", - "\n", - "print(f'Wrapped Test Accuracy: {wrapped_accuracy:.2f}')" - ], - "metadata": { - "collapsed": false - } - }, - { - "cell_type": "markdown", - "source": [ - "## Scan model with Giskard\n", - "With the Giskard scan feature, you can detect vulnerabilities in your model, including performance biases, unrobustness, data leakage, stochasticity, underconfidence, ethical issues, and more. For detailed information about the scan feature, please refer to our scan documentation." - ], - "metadata": { - "collapsed": false - } - }, - { - "cell_type": "code", - "execution_count": null, - "outputs": [], - "source": [ - "results = scan(wrapped_model, wrapped_data)" - ], - "metadata": { - "collapsed": false - } - }, - { - "cell_type": "code", - "execution_count": 16, - "outputs": [ + "cells": [ + { + "cell_type": "markdown", + "source": [ + "# Customer churn prediction [LGBM]\n", + "\n", + "By running this notebook, you’ll create a whole test suite in a few lines of code. The model used here is an LGBM classification model, which predicts customer churn in the cell company. Feel free to use your own model (tabular, text, or LLM).\n", + "\n", + "You’ll learn how to:\n", + "\n", + "* Detect vulnerabilities by scanning the model\n", + "* Generate a test suite with domain-specific tests\n", + "* Customize your test suite by loading a test from the Giskard catalog\n", + "* Upload your model to the Giskard server to:\n", + " * Compare models to decide which one to promote\n", + " * Debug your tests to diagnose issues\n", + " * Share your results and collect business feedback from your team" + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "markdown", + "source": [ + "## Install Giskard" + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "code", + "execution_count": 4, + "outputs": [], + "source": [ + "!pip install \"giskard>=2.0.0b\" -U" + ], + "metadata": { + "collapsed": false, + "ExecuteTime": { + "end_time": "2023-08-22T10:12:25.295802Z", + "start_time": "2023-08-22T10:12:25.284903Z" + } + } + }, + { + "cell_type": "markdown", + "metadata": { + "cell_id": "e8d609f32d5243dd917cc3104599b8d8", + "deepnote_app_coordinates": { + "h": 5, + "w": 12, + "x": 0, + "y": 12 + }, + "deepnote_cell_height": 230, + "deepnote_cell_type": "markdown", + "id": "WNI85koE7xbX", + "pycharm": { + "name": "#%% md\n" + }, + "tags": [] + }, + "source": [ + "## Import libraries" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "ExecuteTime": { + "end_time": "2023-08-22T10:12:36.626998Z", + "start_time": "2023-08-22T10:12:36.583273Z" + } + }, + "outputs": [], + "source": [ + "import pandas as pd\n", + "from lightgbm import LGBMClassifier\n", + "from sklearn.pipeline import Pipeline\n", + "from sklearn.metrics import accuracy_score\n", + "from sklearn.compose import ColumnTransformer\n", + "from sklearn.preprocessing import OneHotEncoder\n", + "from sklearn.preprocessing import StandardScaler\n", + "from sklearn.model_selection import train_test_split\n", + "\n", + "from giskard import Dataset, Model, scan, GiskardClient, testing" + ] + }, + { + "cell_type": "markdown", + "source": [ + "## Define constants" + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "code", + "execution_count": 6, + "outputs": [], + "source": [ + "# Constants.\n", + "RANDOM_SEED = 123\n", + "\n", + "TARGET_COLUMN_NAME = \"Churn\"\n", + "\n", + "COLUMN_TYPES = {'gender': \"category\",\n", + " 'SeniorCitizen': \"category\",\n", + " 'Partner': \"category\",\n", + " 'Dependents': \"category\",\n", + " 'tenure': \"numeric\",\n", + " 'PhoneService': \"category\",\n", + " 'MultipleLines': \"category\",\n", + " 'InternetService': \"category\",\n", + " 'OnlineSecurity': \"category\",\n", + " 'OnlineBackup': \"category\",\n", + " 'DeviceProtection': \"category\",\n", + " 'TechSupport': \"category\",\n", + " 'StreamingTV': \"category\",\n", + " 'StreamingMovies': \"category\",\n", + " 'Contract': \"category\",\n", + " 'PaperlessBilling': \"category\",\n", + " 'PaymentMethod': \"category\",\n", + " 'MonthlyCharges': \"numeric\",\n", + " 'TotalCharges': \"numeric\",\n", + " TARGET_COLUMN_NAME: \"category\"}\n", + "\n", + "FEATURE_TYPES = {i:COLUMN_TYPES[i] for i in COLUMN_TYPES if i != TARGET_COLUMN_NAME}\n", + "\n", + "COLUMNS_TO_SCALE = [key for key in FEATURE_TYPES.keys() if FEATURE_TYPES[key] == \"numeric\"]\n", + "COLUMNS_TO_ENCODE = [key for key in FEATURE_TYPES.keys() if FEATURE_TYPES[key] == \"category\"]\n", + "\n", + "# Paths.\n", + "DATASET_URL = \"https://raw.githubusercontent.com/Giskard-AI/examples/main/datasets/WA_Fn-UseC_-Telco-Customer-Churn.csv\"" + ], + "metadata": { + "collapsed": false, + "ExecuteTime": { + "end_time": "2023-08-22T10:12:37.122948Z", + "start_time": "2023-08-22T10:12:37.103510Z" + } + } + }, + { + "cell_type": "markdown", + "source": [ + "## Dataset preparation" + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "markdown", + "source": [ + "### Load and preprocess data" + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "scrolled": false, + "ExecuteTime": { + "end_time": "2023-08-22T10:12:38.794275Z", + "start_time": "2023-08-22T10:12:38.247656Z" + } + }, + "outputs": [], + "source": [ + "def preprocess(df: pd.DataFrame) -> pd.DataFrame:\n", + " \"\"\"Perform data-preprocessing steps.\"\"\"\n", + " df['TotalCharges'] = pd.to_numeric(df['TotalCharges'], errors='coerce')\n", + " df = df.dropna()\n", + " df = df.drop(columns='customerID')\n", + " df['PaymentMethod'] = df['PaymentMethod'].str.replace(' (automatic)', '', regex=False)\n", + " return df\n", + "\n", + "\n", + "churn_df = pd.read_csv(DATASET_URL)\n", + "churn_df = preprocess(churn_df)" + ] + }, + { + "cell_type": "markdown", + "source": [ + "### Train-test split" + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "code", + "execution_count": 8, + "outputs": [], + "source": [ + "X_train, X_test, Y_train, Y_test = train_test_split(churn_df.drop(columns=TARGET_COLUMN_NAME), \n", + " churn_df[TARGET_COLUMN_NAME],\n", + " random_state=RANDOM_SEED)" + ], + "metadata": { + "collapsed": false, + "ExecuteTime": { + "end_time": "2023-08-22T10:12:39.619478Z", + "start_time": "2023-08-22T10:12:39.552589Z" + } + } + }, + { + "cell_type": "markdown", + "source": [ + "### Wrap dataset with Giskard" + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "code", + "execution_count": 9, + "outputs": [], + "source": [ + "raw_data = pd.concat([X_test, Y_test], axis=1)\n", + "wrapped_data = Dataset(\n", + " df=raw_data, # A pandas.DataFrame that contains the raw data (before all the pre-processing steps) and the actual ground truth variable\n", + " target=TARGET_COLUMN_NAME, # Ground truth variable\n", + " name=\"Churn classification dataset\", # Optional\n", + " cat_columns=COLUMNS_TO_ENCODE # List of categorical columns. Optional, but is a MUST if available. Inferred automatically if not.\n", + ")" + ], + "metadata": { + "collapsed": false, + "ExecuteTime": { + "end_time": "2023-08-22T10:12:40.748778Z", + "start_time": "2023-08-22T10:12:40.649457Z" + } + } + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Model training" + ] + }, + { + "cell_type": "markdown", + "source": [ + "### Define preprocessing steps" + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "code", + "execution_count": 10, + "outputs": [], + "source": [ + "preprocessor = ColumnTransformer(transformers=[\n", + " ('num', StandardScaler(), COLUMNS_TO_SCALE),\n", + " ('cat', OneHotEncoder(handle_unknown='ignore',drop='first'), COLUMNS_TO_ENCODE)\n", + "])" + ], + "metadata": { + "collapsed": false, + "ExecuteTime": { + "end_time": "2023-08-22T10:12:42.530941Z", + "start_time": "2023-08-22T10:12:42.467324Z" + } + } + }, { - "data": { - "text/html": "\n" - }, - "metadata": {}, - "output_type": "display_data" + "cell_type": "markdown", + "source": [ + "### Build estimator" + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pipeline = Pipeline(steps=[\n", + " ('preprocessor', preprocessor),\n", + " ('classifier', LGBMClassifier(random_state=RANDOM_SEED))\n", + "])\n", + " \n", + "# Fit model.\n", + "pipeline.fit(X_train, Y_train)\n", + "\n", + "# Evaluate model.\n", + "Y_train_pred = pipeline.predict(X_train)\n", + "train_accuracy = accuracy_score(Y_train, Y_train_pred)\n", + "\n", + "Y_test_pred = pipeline.predict(X_test)\n", + "test_accuracy = accuracy_score(Y_test, Y_test_pred)\n", + "\n", + "print(f'Train Accuracy: {train_accuracy:.2f}')\n", + "print(f'Test Accuracy: {test_accuracy:.2f}')" + ] + }, + { + "cell_type": "markdown", + "source": [ + "### Wrap model with Giskard" + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "code", + "execution_count": null, + "outputs": [], + "source": [ + "wrapped_model = Model(\n", + " model=pipeline, # A prediction function that encapsulates all the data pre-processing steps and that could be executed with the dataset used by the scan.\n", + " model_type=\"classification\", # Either regression, classification or text_generation.\n", + " name=\"Churn classification\", # Optional\n", + " classification_labels=pipeline.classes_, # Their order MUST be identical to the prediction_function's output order\n", + " feature_names=FEATURE_TYPES.keys(), # Default: all columns of your dataset\n", + " # classification_threshold=0.5, # Default: 0.5\n", + ")\n", + "\n", + "# Validate wrapped model.\n", + "wrapped_Y_pred = wrapped_model.predict(wrapped_data).prediction\n", + "wrapped_accuracy = accuracy_score(Y_test, wrapped_Y_pred)\n", + "\n", + "print(f'Wrapped Test Accuracy: {wrapped_accuracy:.2f}')" + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "markdown", + "source": [ + "## Scan model with Giskard\n", + "With the Giskard scan feature, you can detect vulnerabilities in your model, including performance biases, unrobustness, data leakage, stochasticity, underconfidence, ethical issues, and more. For detailed information about the scan feature, please refer to our scan documentation." + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "code", + "execution_count": null, + "outputs": [], + "source": [ + "results = scan(wrapped_model, wrapped_data)" + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "code", + "execution_count": 16, + "outputs": [ + { + "data": { + "text/html": "\n" + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "display(results)" + ], + "metadata": { + "collapsed": false, + "ExecuteTime": { + "end_time": "2023-08-22T10:14:35.907994Z", + "start_time": "2023-08-22T10:14:32.495751Z" + } + } + }, + { + "cell_type": "markdown", + "source": [ + "## Generate a test suite from the Scan\n", + "The objects produced by the scan can be used as fixtures to generate a test suite that integrate domain-specific issues. To create custom tests, refer to the Test your ML Model page." + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "code", + "execution_count": null, + "outputs": [], + "source": [ + "test_suite = results.generate_test_suite(\"My first test suite\")\n", + "test_suite.run()" + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "markdown", + "source": [ + "## Customize your suite by loading objects from the Giskard catalog\n", + "\n", + "The Giskard open source catalog will enable to load:\n", + "\n", + "* Tests such as metamorphic, performance, prediction & data drift, statistical tests, etc\n", + "* Slicing functions such as detectors of toxicity, hate, emotion, etc\n", + "* Transformation functions such as generators of typos, paraphrase, style tune, etc\n", + "\n", + "For demo purposes, we will load a simple unit test (test_f1) that checks if the test F1 score is above the given threshold. For more examples of tests and functions, refer to the Giskard catalog." + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "code", + "execution_count": null, + "outputs": [], + "source": [ + "test_suite.add_test(testing.test_f1(model=wrapped_model, dataset=wrapped_data, threshold=0.7)).run()" + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "markdown", + "source": [ + "## Upload your suite to the Giskard server\n", + "\n", + "Upload your suite to the Giskard server to:\n", + "\n", + "* Compare models to decide which model to promote\n", + "* Debug your tests to diagnose the issues\n", + "* Create more domain-specific tests that are integrating business feedback\n", + "* Share your results" + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "code", + "execution_count": null, + "outputs": [], + "source": [ + "# Uploading the test suite will automatically save the model, dataset, tests, slicing & transformation functions inside the Giskard UI server\n", + "# Create a Giskard client after having install the Giskard server (see documentation)\n", + "token = \"API_TOKEN\" # Find it in Settings in the Giskard server\n", + "client = GiskardClient(\n", + " url=\"http://localhost:19000\", # URL of your Giskard instance\n", + " token=token\n", + ")\n", + "\n", + "my_project = client.create_project(\"my_project\", \"PROJECT_NAME\", \"DESCRIPTION\")\n", + "\n", + "# Upload to the current project ✉️\n", + "test_suite.upload(client, \"my_project\")" + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "markdown", + "source": [ + "
\n", + "Connecting Google Colab with the Giskard server\n", + "\n", + "If you are using Google Colab and you want to install the Giskard server **locally**, you can run the Giskard server by executing this line in the terminal of your **local** machine (see the [documentation](https://docs.giskard.ai/en/latest/guides/installation_app/index.html)):\n", + "\n", + "> giskard server start\n", + "\n", + "Once the Giskard server is running, from the same terminal on your **local** machine, you can run:\n", + "\n", + "> giskard server expose --ngrok-token \\\n", + "\n", + "Read the flowing [instructions](https://docs.giskard.ai/en/latest/cli/ngrok/index.html) in order to get the `ngrok_API_token`. This will provide you with the code snippets that you can copy and paste into your Colab notebook to establish a connection with your locally installed Giskard server\n", + "
" + ], + "metadata": { + "collapsed": false + } } - ], - "source": [ - "display(results)" - ], - "metadata": { - "collapsed": false, - "ExecuteTime": { - "end_time": "2023-08-22T10:14:35.907994Z", - "start_time": "2023-08-22T10:14:32.495751Z" + ], + "metadata": { + "colab": { + "collapsed_sections": [], + "name": "German_credit_scoring_giskard (2).ipynb", + "provenance": [] + }, + "deepnote": { + "is_reactive": false + }, + "deepnote_app_layout": "article", + "deepnote_execution_queue": [], + "deepnote_notebook_id": "6e7ea85d-f19e-4d05-90a4-44b7668fd037", + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.15" } - } - }, - { - "cell_type": "markdown", - "source": [ - "## Generate a test suite from the Scan\n", - "The objects produced by the scan can be used as fixtures to generate a test suite that integrate domain-specific issues. To create custom tests, refer to the Test your ML Model page." - ], - "metadata": { - "collapsed": false - } - }, - { - "cell_type": "code", - "execution_count": null, - "outputs": [], - "source": [ - "test_suite = results.generate_test_suite(\"My first test suite\")\n", - "test_suite.run()" - ], - "metadata": { - "collapsed": false - } - }, - { - "cell_type": "markdown", - "source": [ - "## Customize your suite by loading objects from the Giskard catalog\n", - "\n", - "The Giskard open source catalog will enable to load:\n", - "\n", - "* Tests such as metamorphic, performance, prediction & data drift, statistical tests, etc\n", - "* Slicing functions such as detectors of toxicity, hate, emotion, etc\n", - "* Transformation functions such as generators of typos, paraphrase, style tune, etc\n", - "\n", - "For demo purposes, we will load a simple unit test (test_f1) that checks if the test F1 score is above the given threshold. For more examples of tests and functions, refer to the Giskard catalog." - ], - "metadata": { - "collapsed": false - } - }, - { - "cell_type": "code", - "execution_count": null, - "outputs": [], - "source": [ - "test_suite.add_test(testing.test_f1(model=wrapped_model, dataset=wrapped_data, threshold=0.7)).run()" - ], - "metadata": { - "collapsed": false - } - }, - { - "cell_type": "markdown", - "source": [ - "## Upload your suite to the Giskard server\n", - "\n", - "Upload your suite to the Giskard server to:\n", - "\n", - "* Compare models to decide which model to promote\n", - "* Debug your tests to diagnose the issues\n", - "* Create more domain-specific tests that are integrating business feedback\n", - "* Share your results" - ], - "metadata": { - "collapsed": false - } - }, - { - "cell_type": "code", - "execution_count": null, - "outputs": [], - "source": [ - "# Uploading the test suite will automatically save the model, dataset, tests, slicing & transformation functions inside the Giskard UI server\n", - "# Create a Giskard client after having install the Giskard server (see documentation)\n", - "token = \"API_TOKEN\" # Find it in Settings in the Giskard server\n", - "client = GiskardClient(\n", - " url=\"http://localhost:19000\", # URL of your Giskard instance\n", - " token=token\n", - ")\n", - "\n", - "my_project = client.create_project(\"my_project\", \"PROJECT_NAME\", \"DESCRIPTION\")\n", - "\n", - "# Upload to the current project ✉️\n", - "test_suite.upload(client, \"my_project\")" - ], - "metadata": { - "collapsed": false - } - }, - { - "cell_type": "markdown", - "source": [ - "
\n", - "Connecting Google Colab with the Giskard server\n", - "\n", - "If you are using Google Colab and you want to install the Giskard server **locally**, you can run the Giskard server by executing this line in the terminal of your **local** machine (see the [documentation](https://docs.giskard.ai/en/latest/guides/installation_app/index.html)):\n", - "\n", - "> giskard server start\n", - "\n", - "Once the Giskard server is running, from the same terminal on your **local** machine, you can run:\n", - "\n", - "> giskard server expose --token \\\n", - "\n", - "Read the flowing [instructions](https://docs.giskard.ai/en/latest/cli/ngrok/index.html) in order to get the `ngrok_API_token`. This will provide you with the code snippets that you can copy and paste into your Colab notebook to establish a connection with your locally installed Giskard server\n", - "
" - ], - "metadata": { - "collapsed": false - } - } - ], - "metadata": { - "colab": { - "collapsed_sections": [], - "name": "German_credit_scoring_giskard (2).ipynb", - "provenance": [] - }, - "deepnote": { - "is_reactive": false - }, - "deepnote_app_layout": "article", - "deepnote_execution_queue": [], - "deepnote_notebook_id": "6e7ea85d-f19e-4d05-90a4-44b7668fd037", - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.15" - } - }, - "nbformat": 4, - "nbformat_minor": 1 -} + "nbformat": 4, + "nbformat_minor": 1 +} \ No newline at end of file diff --git a/python-client/docs/reference/notebooks/credit_scoring.ipynb b/python-client/docs/reference/notebooks/credit_scoring.ipynb index bb39136aad..40a429e08a 100644 --- a/python-client/docs/reference/notebooks/credit_scoring.ipynb +++ b/python-client/docs/reference/notebooks/credit_scoring.ipynb @@ -1,530 +1,530 @@ { - "cells": [ - { - "cell_type": "markdown", - "source": [ - "# German credit scoring [sklearn]\n", - "\n", - "By running this notebook, you’ll create a whole test suite in a few lines of code. The model used here is a simple logistic regression model with the german credit scoring dataset. Feel free to use your own model (tabular, text, or LLM).\n", - "\n", - "You’ll learn how to:\n", - "* Detect vulnerabilities by scanning the model\n", - "* Generate a test suite with domain-specific tests\n", - "* Customize your test suite by loading a test from the Giskard catalog\n", - "* Upload your model to the Giskard server to:\n", - " * Compare models to decide which one to promote\n", - " * Debug your tests to diagnose issues\n", - " * Share your results and collect business feedback from your team" - ], - "metadata": { - "collapsed": false - } - }, - { - "cell_type": "markdown", - "source": [ - "## Install Giskard" - ], - "metadata": { - "collapsed": false - } - }, - { - "cell_type": "code", - "execution_count": 22, - "outputs": [], - "source": [ - "pip install \"giskard>=2.0.0b\" -U" - ], - "metadata": { - "collapsed": false, - "ExecuteTime": { - "end_time": "2023-08-21T12:06:20.563432Z", - "start_time": "2023-08-21T12:06:20.538551Z" - } - } - }, - { - "cell_type": "markdown", - "source": [ - "## Import libraries" - ], - "metadata": { - "collapsed": false - } - }, - { - "cell_type": "code", - "execution_count": 23, - "metadata": { - "collapsed": true, - "ExecuteTime": { - "end_time": "2023-08-21T12:06:21.107499Z", - "start_time": "2023-08-21T12:06:21.091114Z" - } - }, - "outputs": [], - "source": [ - "import pandas as pd\n", - "from sklearn.compose import ColumnTransformer\n", - "from sklearn.impute import SimpleImputer\n", - "from sklearn.linear_model import LogisticRegression\n", - "from sklearn.metrics import classification_report\n", - "from sklearn.model_selection import train_test_split\n", - "from sklearn.pipeline import Pipeline\n", - "from sklearn.preprocessing import OneHotEncoder, StandardScaler\n", - "\n", - "import giskard\n", - "from giskard import Model, Dataset, testing, GiskardClient" - ] - }, - { - "cell_type": "markdown", - "source": [ - "## Define constants" - ], - "metadata": { - "collapsed": false - } - }, - { - "cell_type": "code", - "execution_count": 24, - "outputs": [], - "source": [ - "# Constants.\n", - "COLUMN_TYPES = {\n", - " \"account_check_status\": \"category\",\n", - " \"duration_in_month\": \"numeric\",\n", - " \"credit_history\": \"category\",\n", - " \"purpose\": \"category\",\n", - " \"credit_amount\": \"numeric\",\n", - " \"savings\": \"category\",\n", - " \"present_employment_since\": \"category\",\n", - " \"installment_as_income_perc\": \"numeric\",\n", - " \"sex\": \"category\",\n", - " \"personal_status\": \"category\",\n", - " \"other_debtors\": \"category\",\n", - " \"present_residence_since\": \"numeric\",\n", - " \"property\": \"category\",\n", - " \"age\": \"category\",\n", - " \"other_installment_plans\": \"category\",\n", - " \"housing\": \"category\",\n", - " \"credits_this_bank\": \"numeric\",\n", - " \"job\": \"category\",\n", - " \"people_under_maintenance\": \"numeric\",\n", - " \"telephone\": \"category\",\n", - " \"foreign_worker\": \"category\",\n", - "}\n", - "\n", - "TARGET_COLUMN_NAME = \"default\"\n", - "\n", - "COLUMNS_TO_SCALE = [key for key in COLUMN_TYPES.keys() if COLUMN_TYPES[key] == \"numeric\"]\n", - "COLUMNS_TO_ENCODE = [key for key in COLUMN_TYPES.keys() if COLUMN_TYPES[key] == \"category\"]\n", - "\n", - "# Paths.\n", - "DATA_URL = \"https://raw.githubusercontent.com/Giskard-AI/giskard-examples/main/datasets/credit_scoring_classification_model_dataset/german_credit_prepared.csv\"" - ], - "metadata": { - "collapsed": false, - "ExecuteTime": { - "end_time": "2023-08-21T12:06:21.529881Z", - "start_time": "2023-08-21T12:06:21.514699Z" - } - } - }, - { - "cell_type": "markdown", - "source": [ - "## Dataset preparation" - ], - "metadata": { - "collapsed": false - } - }, - { - "cell_type": "markdown", - "source": [ - "### Load data" - ], - "metadata": { - "collapsed": false - } - }, - { - "cell_type": "code", - "execution_count": 25, - "outputs": [], - "source": [ - "df = pd.read_csv(DATA_URL, keep_default_na=False, na_values=[\"_GSK_NA_\"])" - ], - "metadata": { - "collapsed": false, - "ExecuteTime": { - "end_time": "2023-08-21T12:06:22.406745Z", - "start_time": "2023-08-21T12:06:22.141476Z" - } - } - }, - { - "cell_type": "markdown", - "source": [ - "### Train-test split" - ], - "metadata": { - "collapsed": false - } - }, - { - "cell_type": "code", - "execution_count": 26, - "outputs": [], - "source": [ - "X_train, X_test, Y_train, Y_test = train_test_split(df.drop(columns=TARGET_COLUMN_NAME), df[TARGET_COLUMN_NAME],\n", - " test_size=0.2, random_state=0, stratify=df[TARGET_COLUMN_NAME])" - ], - "metadata": { - "collapsed": false, - "ExecuteTime": { - "end_time": "2023-08-21T12:06:22.588559Z", - "start_time": "2023-08-21T12:06:22.568863Z" - } - } - }, - { - "cell_type": "markdown", - "source": [ - "### Wrap dataset with Giskard" - ], - "metadata": { - "collapsed": false - } - }, - { - "cell_type": "code", - "execution_count": 27, - "outputs": [], - "source": [ - "raw_data = pd.concat([X_test, Y_test], axis=1)\n", - "wrapped_data = Dataset(\n", - " df=raw_data, # A pandas.DataFrame that contains the raw data (before all the pre-processing steps) and the actual ground truth variable (target).\n", - " target=TARGET_COLUMN_NAME, # Ground truth variable.\n", - " name='German credit scoring dataset', # Optional.\n", - " cat_columns=COLUMNS_TO_ENCODE # List of categorical columns. Optional, but is a MUST if available. Inferred automatically if not.\n", - ")" - ], - "metadata": { - "collapsed": false, - "ExecuteTime": { - "end_time": "2023-08-21T12:06:23.000841Z", - "start_time": "2023-08-21T12:06:22.975202Z" - } - } - }, - { - "cell_type": "markdown", - "source": [ - "## Model training" - ], - "metadata": { - "collapsed": false - } - }, - { - "cell_type": "markdown", - "source": [ - "### Define preprocessing steps" - ], - "metadata": { - "collapsed": false - } - }, - { - "cell_type": "code", - "execution_count": 28, - "outputs": [], - "source": [ - "numeric_transformer = Pipeline(steps=[\n", - " (\"imputer\", SimpleImputer(strategy=\"median\")),\n", - " (\"scaler\", StandardScaler())\n", - "])\n", - "\n", - "categorical_transformer = Pipeline([\n", - " (\"imputer\", SimpleImputer(strategy=\"constant\", fill_value=\"missing\")),\n", - " (\"onehot\", OneHotEncoder(handle_unknown=\"ignore\", sparse=False)),\n", - "])\n", - "\n", - "preprocessor = ColumnTransformer(transformers=[\n", - " (\"num\", numeric_transformer, COLUMNS_TO_SCALE),\n", - " (\"cat\", categorical_transformer, COLUMNS_TO_ENCODE),\n", - "])" - ], - "metadata": { - "collapsed": false, - "ExecuteTime": { - "end_time": "2023-08-21T12:06:23.767134Z", - "start_time": "2023-08-21T12:06:23.718945Z" - } - } - }, - { - "cell_type": "markdown", - "source": [ - "### Build estimator" - ], - "metadata": { - "collapsed": false - } - }, - { - "cell_type": "code", - "execution_count": 29, - "outputs": [ + "cells": [ { - "name": "stdout", - "output_type": "stream", - "text": [ - " precision recall f1-score support\n", - "\n", - " Default 0.68 0.45 0.54 60\n", - " Not default 0.79 0.91 0.85 140\n", - "\n", - " accuracy 0.77 200\n", - " macro avg 0.73 0.68 0.69 200\n", - "weighted avg 0.76 0.77 0.75 200\n" - ] + "cell_type": "markdown", + "source": [ + "# German credit scoring [sklearn]\n", + "\n", + "By running this notebook, you’ll create a whole test suite in a few lines of code. The model used here is a simple logistic regression model with the german credit scoring dataset. Feel free to use your own model (tabular, text, or LLM).\n", + "\n", + "You’ll learn how to:\n", + "* Detect vulnerabilities by scanning the model\n", + "* Generate a test suite with domain-specific tests\n", + "* Customize your test suite by loading a test from the Giskard catalog\n", + "* Upload your model to the Giskard server to:\n", + " * Compare models to decide which one to promote\n", + " * Debug your tests to diagnose issues\n", + " * Share your results and collect business feedback from your team" + ], + "metadata": { + "collapsed": false + } }, { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/mykytaalekseiev/Work/giskard_main/python-client/.venv/lib/python3.10/site-packages/sklearn/preprocessing/_encoders.py:868: FutureWarning: `sparse` was renamed to `sparse_output` in version 1.2 and will be removed in 1.4. `sparse_output` is ignored unless you leave `sparse` to its default value.\n", - " warnings.warn(\n" - ] - } - ], - "source": [ - "pipeline = Pipeline(steps=[\n", - " (\"preprocessor\", preprocessor),\n", - " (\"classifier\", LogisticRegression(max_iter=100))\n", - "])\n", - "\n", - "pipeline.fit(X_train, Y_train)\n", - "\n", - "pred_train = pipeline.predict(X_train)\n", - "pred_test = pipeline.predict(X_test)\n", - "\n", - "print(classification_report(Y_test, pred_test))" - ], - "metadata": { - "collapsed": false, - "ExecuteTime": { - "end_time": "2023-08-21T12:06:24.530066Z", - "start_time": "2023-08-21T12:06:24.368164Z" - } - } - }, - { - "cell_type": "markdown", - "source": [ - "### Wrap model with Giskard" - ], - "metadata": { - "collapsed": false - } - }, - { - "cell_type": "code", - "execution_count": null, - "outputs": [], - "source": [ - "wrapped_model = Model(\n", - " model=pipeline, # A prediction function that encapsulates all the data pre-processing steps and that could be executed with the dataset used by the scan.\n", - " model_type=\"classification\", # Either regression, classification or text_generation.\n", - " name=\"Credit scoring classifier\", # Optional.\n", - " classification_labels=pipeline.classes_.tolist(), # Their order MUST be identical to the prediction_function's output order.\n", - " feature_names=list(COLUMN_TYPES.keys()), # Default: all columns of your dataset.\n", - " # classification_threshold=0.5 # Default: 0.5.\n", - ")\n", - "\n", - "# Validate wrapped model.\n", - "print(classification_report(Y_test, pipeline.classes_[wrapped_model.predict(wrapped_data).raw_prediction]))" - ], - "metadata": { - "collapsed": false - } - }, - { - "cell_type": "markdown", - "source": [ - "## Scan your model to find vulnerabilities\n", - "With the Giskard scan feature, you can detect vulnerabilities in your model, including performance biases, unrobustness, data leakage, stochasticity, underconfidence, ethical issues, and more. For detailed information about the scan feature, please refer to our scan documentation." - ], - "metadata": { - "collapsed": false - } - }, - { - "cell_type": "code", - "execution_count": null, - "outputs": [], - "source": [ - "results = giskard.scan(wrapped_model, wrapped_data)" - ], - "metadata": { - "collapsed": false - } - }, - { - "cell_type": "code", - "execution_count": 32, - "outputs": [ + "cell_type": "markdown", + "source": [ + "## Install Giskard" + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "code", + "execution_count": 22, + "outputs": [], + "source": [ + "pip install \"giskard>=2.0.0b\" -U" + ], + "metadata": { + "collapsed": false, + "ExecuteTime": { + "end_time": "2023-08-21T12:06:20.563432Z", + "start_time": "2023-08-21T12:06:20.538551Z" + } + } + }, + { + "cell_type": "markdown", + "source": [ + "## Import libraries" + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": { + "collapsed": true, + "ExecuteTime": { + "end_time": "2023-08-21T12:06:21.107499Z", + "start_time": "2023-08-21T12:06:21.091114Z" + } + }, + "outputs": [], + "source": [ + "import pandas as pd\n", + "from sklearn.compose import ColumnTransformer\n", + "from sklearn.impute import SimpleImputer\n", + "from sklearn.linear_model import LogisticRegression\n", + "from sklearn.metrics import classification_report\n", + "from sklearn.model_selection import train_test_split\n", + "from sklearn.pipeline import Pipeline\n", + "from sklearn.preprocessing import OneHotEncoder, StandardScaler\n", + "\n", + "import giskard\n", + "from giskard import Model, Dataset, testing, GiskardClient" + ] + }, + { + "cell_type": "markdown", + "source": [ + "## Define constants" + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "code", + "execution_count": 24, + "outputs": [], + "source": [ + "# Constants.\n", + "COLUMN_TYPES = {\n", + " \"account_check_status\": \"category\",\n", + " \"duration_in_month\": \"numeric\",\n", + " \"credit_history\": \"category\",\n", + " \"purpose\": \"category\",\n", + " \"credit_amount\": \"numeric\",\n", + " \"savings\": \"category\",\n", + " \"present_employment_since\": \"category\",\n", + " \"installment_as_income_perc\": \"numeric\",\n", + " \"sex\": \"category\",\n", + " \"personal_status\": \"category\",\n", + " \"other_debtors\": \"category\",\n", + " \"present_residence_since\": \"numeric\",\n", + " \"property\": \"category\",\n", + " \"age\": \"category\",\n", + " \"other_installment_plans\": \"category\",\n", + " \"housing\": \"category\",\n", + " \"credits_this_bank\": \"numeric\",\n", + " \"job\": \"category\",\n", + " \"people_under_maintenance\": \"numeric\",\n", + " \"telephone\": \"category\",\n", + " \"foreign_worker\": \"category\",\n", + "}\n", + "\n", + "TARGET_COLUMN_NAME = \"default\"\n", + "\n", + "COLUMNS_TO_SCALE = [key for key in COLUMN_TYPES.keys() if COLUMN_TYPES[key] == \"numeric\"]\n", + "COLUMNS_TO_ENCODE = [key for key in COLUMN_TYPES.keys() if COLUMN_TYPES[key] == \"category\"]\n", + "\n", + "# Paths.\n", + "DATA_URL = \"https://raw.githubusercontent.com/Giskard-AI/giskard-examples/main/datasets/credit_scoring_classification_model_dataset/german_credit_prepared.csv\"" + ], + "metadata": { + "collapsed": false, + "ExecuteTime": { + "end_time": "2023-08-21T12:06:21.529881Z", + "start_time": "2023-08-21T12:06:21.514699Z" + } + } + }, + { + "cell_type": "markdown", + "source": [ + "## Dataset preparation" + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "markdown", + "source": [ + "### Load data" + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "code", + "execution_count": 25, + "outputs": [], + "source": [ + "df = pd.read_csv(DATA_URL, keep_default_na=False, na_values=[\"_GSK_NA_\"])" + ], + "metadata": { + "collapsed": false, + "ExecuteTime": { + "end_time": "2023-08-21T12:06:22.406745Z", + "start_time": "2023-08-21T12:06:22.141476Z" + } + } + }, { - "data": { - "text/html": "\n" - }, - "metadata": {}, - "output_type": "display_data" + "cell_type": "markdown", + "source": [ + "### Train-test split" + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "code", + "execution_count": 26, + "outputs": [], + "source": [ + "X_train, X_test, Y_train, Y_test = train_test_split(df.drop(columns=TARGET_COLUMN_NAME), df[TARGET_COLUMN_NAME],\n", + " test_size=0.2, random_state=0, stratify=df[TARGET_COLUMN_NAME])" + ], + "metadata": { + "collapsed": false, + "ExecuteTime": { + "end_time": "2023-08-21T12:06:22.588559Z", + "start_time": "2023-08-21T12:06:22.568863Z" + } + } + }, + { + "cell_type": "markdown", + "source": [ + "### Wrap dataset with Giskard" + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "code", + "execution_count": 27, + "outputs": [], + "source": [ + "raw_data = pd.concat([X_test, Y_test], axis=1)\n", + "wrapped_data = Dataset(\n", + " df=raw_data, # A pandas.DataFrame that contains the raw data (before all the pre-processing steps) and the actual ground truth variable (target).\n", + " target=TARGET_COLUMN_NAME, # Ground truth variable.\n", + " name='German credit scoring dataset', # Optional.\n", + " cat_columns=COLUMNS_TO_ENCODE # List of categorical columns. Optional, but is a MUST if available. Inferred automatically if not.\n", + ")" + ], + "metadata": { + "collapsed": false, + "ExecuteTime": { + "end_time": "2023-08-21T12:06:23.000841Z", + "start_time": "2023-08-21T12:06:22.975202Z" + } + } + }, + { + "cell_type": "markdown", + "source": [ + "## Model training" + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "markdown", + "source": [ + "### Define preprocessing steps" + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "code", + "execution_count": 28, + "outputs": [], + "source": [ + "numeric_transformer = Pipeline(steps=[\n", + " (\"imputer\", SimpleImputer(strategy=\"median\")),\n", + " (\"scaler\", StandardScaler())\n", + "])\n", + "\n", + "categorical_transformer = Pipeline([\n", + " (\"imputer\", SimpleImputer(strategy=\"constant\", fill_value=\"missing\")),\n", + " (\"onehot\", OneHotEncoder(handle_unknown=\"ignore\", sparse=False)),\n", + "])\n", + "\n", + "preprocessor = ColumnTransformer(transformers=[\n", + " (\"num\", numeric_transformer, COLUMNS_TO_SCALE),\n", + " (\"cat\", categorical_transformer, COLUMNS_TO_ENCODE),\n", + "])" + ], + "metadata": { + "collapsed": false, + "ExecuteTime": { + "end_time": "2023-08-21T12:06:23.767134Z", + "start_time": "2023-08-21T12:06:23.718945Z" + } + } + }, + { + "cell_type": "markdown", + "source": [ + "### Build estimator" + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "code", + "execution_count": 29, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " precision recall f1-score support\n", + "\n", + " Default 0.68 0.45 0.54 60\n", + " Not default 0.79 0.91 0.85 140\n", + "\n", + " accuracy 0.77 200\n", + " macro avg 0.73 0.68 0.69 200\n", + "weighted avg 0.76 0.77 0.75 200\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/mykytaalekseiev/Work/giskard_main/python-client/.venv/lib/python3.10/site-packages/sklearn/preprocessing/_encoders.py:868: FutureWarning: `sparse` was renamed to `sparse_output` in version 1.2 and will be removed in 1.4. `sparse_output` is ignored unless you leave `sparse` to its default value.\n", + " warnings.warn(\n" + ] + } + ], + "source": [ + "pipeline = Pipeline(steps=[\n", + " (\"preprocessor\", preprocessor),\n", + " (\"classifier\", LogisticRegression(max_iter=100))\n", + "])\n", + "\n", + "pipeline.fit(X_train, Y_train)\n", + "\n", + "pred_train = pipeline.predict(X_train)\n", + "pred_test = pipeline.predict(X_test)\n", + "\n", + "print(classification_report(Y_test, pred_test))" + ], + "metadata": { + "collapsed": false, + "ExecuteTime": { + "end_time": "2023-08-21T12:06:24.530066Z", + "start_time": "2023-08-21T12:06:24.368164Z" + } + } + }, + { + "cell_type": "markdown", + "source": [ + "### Wrap model with Giskard" + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "code", + "execution_count": null, + "outputs": [], + "source": [ + "wrapped_model = Model(\n", + " model=pipeline, # A prediction function that encapsulates all the data pre-processing steps and that could be executed with the dataset used by the scan.\n", + " model_type=\"classification\", # Either regression, classification or text_generation.\n", + " name=\"Credit scoring classifier\", # Optional.\n", + " classification_labels=pipeline.classes_.tolist(), # Their order MUST be identical to the prediction_function's output order.\n", + " feature_names=list(COLUMN_TYPES.keys()), # Default: all columns of your dataset.\n", + " # classification_threshold=0.5 # Default: 0.5.\n", + ")\n", + "\n", + "# Validate wrapped model.\n", + "print(classification_report(Y_test, pipeline.classes_[wrapped_model.predict(wrapped_data).raw_prediction]))" + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "markdown", + "source": [ + "## Scan your model to find vulnerabilities\n", + "With the Giskard scan feature, you can detect vulnerabilities in your model, including performance biases, unrobustness, data leakage, stochasticity, underconfidence, ethical issues, and more. For detailed information about the scan feature, please refer to our scan documentation." + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "code", + "execution_count": null, + "outputs": [], + "source": [ + "results = giskard.scan(wrapped_model, wrapped_data)" + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "code", + "execution_count": 32, + "outputs": [ + { + "data": { + "text/html": "\n" + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "display(results)" + ], + "metadata": { + "collapsed": false, + "ExecuteTime": { + "end_time": "2023-08-21T12:06:48.403734Z", + "start_time": "2023-08-21T12:06:47.466032Z" + } + } + }, + { + "cell_type": "code", + "execution_count": null, + "outputs": [], + "source": [ + "test_suite = results.generate_test_suite(\"My first test suite\")\n", + "test_suite.run()" + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "markdown", + "source": [ + "## Customize your suite by loading objects from the Giskard catalog\n", + "\n", + "The Giskard open source catalog will enable to load:\n", + "* Tests such as metamorphic, performance, prediction & data drift, statistical tests, etc\n", + "* Slicing functions such as detectors of toxicity, hate, emotion, etc\n", + "* Transformation functions such as generators of typos, paraphrase, style tune, etc\n", + "\n", + "For demo purposes, we will load a simple unit test (test_f1) that checks if the test F1 score is above the given threshold. For more examples of tests and functions, refer to the Giskard catalog." + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "code", + "execution_count": null, + "outputs": [], + "source": [ + "test_suite.add_test(testing.test_f1(model=wrapped_model, dataset=wrapped_data, threshold=0.7)).run()" + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "markdown", + "source": [ + "## Upload your suite to the Giskard server\n", + "\n", + "Upload your suite to the Giskard server to:\n", + "* Compare models to decide which model to promote\n", + "* Debug your tests to diagnose the issues\n", + "* Create more domain-specific tests that are integrating business feedback\n", + "* Share your results" + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "code", + "execution_count": null, + "outputs": [], + "source": [ + "# Uploading the test suite will automatically save the model, dataset, tests, slicing & transformation functions inside the Giskard UI server\n", + "# Create a Giskard client after having install the Giskard server (see documentation)\n", + "token = \"API_TOKEN\" # Find it in Settings in the Giskard server\n", + "\n", + "client = GiskardClient(\n", + " url=\"http://localhost:19000\", # URL of your Giskard instance\n", + " token=token\n", + ")\n", + "\n", + "my_project = client.create_project(\"my_project\", \"PROJECT_NAME\", \"DESCRIPTION\")\n", + "\n", + "# Upload to the current project ✉️\n", + "test_suite.upload(client, \"my_project\")" + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "markdown", + "source": [ + "
\n", + "Connecting Google Colab with the Giskard server\n", + "\n", + "If you are using Google Colab and you want to install the Giskard server **locally**, you can run the Giskard server by executing this line in the terminal of your **local** machine (see the [documentation](https://docs.giskard.ai/en/latest/guides/installation_app/index.html)):\n", + "\n", + "> giskard server start\n", + "\n", + "Once the Giskard server is running, from the same terminal on your **local** machine, you can run:\n", + "\n", + "> giskard server expose --ngrok-token \\\n", + "\n", + "Read the flowing [instructions](https://docs.giskard.ai/en/latest/cli/ngrok/index.html) in order to get the `ngrok_API_token`. This will provide you with the code snippets that you can copy and paste into your Colab notebook to establish a connection with your locally installed Giskard server\n", + "
" + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "code", + "execution_count": null, + "outputs": [], + "source": [], + "metadata": { + "collapsed": false + } } - ], - "source": [ - "display(results)" - ], - "metadata": { - "collapsed": false, - "ExecuteTime": { - "end_time": "2023-08-21T12:06:48.403734Z", - "start_time": "2023-08-21T12:06:47.466032Z" + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 2 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython2", + "version": "2.7.6" } - } - }, - { - "cell_type": "code", - "execution_count": null, - "outputs": [], - "source": [ - "test_suite = results.generate_test_suite(\"My first test suite\")\n", - "test_suite.run()" - ], - "metadata": { - "collapsed": false - } - }, - { - "cell_type": "markdown", - "source": [ - "## Customize your suite by loading objects from the Giskard catalog\n", - "\n", - "The Giskard open source catalog will enable to load:\n", - "* Tests such as metamorphic, performance, prediction & data drift, statistical tests, etc\n", - "* Slicing functions such as detectors of toxicity, hate, emotion, etc\n", - "* Transformation functions such as generators of typos, paraphrase, style tune, etc\n", - "\n", - "For demo purposes, we will load a simple unit test (test_f1) that checks if the test F1 score is above the given threshold. For more examples of tests and functions, refer to the Giskard catalog." - ], - "metadata": { - "collapsed": false - } - }, - { - "cell_type": "code", - "execution_count": null, - "outputs": [], - "source": [ - "test_suite.add_test(testing.test_f1(model=wrapped_model, dataset=wrapped_data, threshold=0.7)).run()" - ], - "metadata": { - "collapsed": false - } - }, - { - "cell_type": "markdown", - "source": [ - "## Upload your suite to the Giskard server\n", - "\n", - "Upload your suite to the Giskard server to:\n", - "* Compare models to decide which model to promote\n", - "* Debug your tests to diagnose the issues\n", - "* Create more domain-specific tests that are integrating business feedback\n", - "* Share your results" - ], - "metadata": { - "collapsed": false - } - }, - { - "cell_type": "code", - "execution_count": null, - "outputs": [], - "source": [ - "# Uploading the test suite will automatically save the model, dataset, tests, slicing & transformation functions inside the Giskard UI server\n", - "# Create a Giskard client after having install the Giskard server (see documentation)\n", - "token = \"API_TOKEN\" # Find it in Settings in the Giskard server\n", - "\n", - "client = GiskardClient(\n", - " url=\"http://localhost:19000\", # URL of your Giskard instance\n", - " token=token\n", - ")\n", - "\n", - "my_project = client.create_project(\"my_project\", \"PROJECT_NAME\", \"DESCRIPTION\")\n", - "\n", - "# Upload to the current project ✉️\n", - "test_suite.upload(client, \"my_project\")" - ], - "metadata": { - "collapsed": false - } - }, - { - "cell_type": "markdown", - "source": [ - "
\n", - "Connecting Google Colab with the Giskard server\n", - "\n", - "If you are using Google Colab and you want to install the Giskard server **locally**, you can run the Giskard server by executing this line in the terminal of your **local** machine (see the [documentation](https://docs.giskard.ai/en/latest/guides/installation_app/index.html)):\n", - "\n", - "> giskard server start\n", - "\n", - "Once the Giskard server is running, from the same terminal on your **local** machine, you can run:\n", - "\n", - "> giskard server expose --token \\\n", - "\n", - "Read the flowing [instructions](https://docs.giskard.ai/en/latest/cli/ngrok/index.html) in order to get the `ngrok_API_token`. This will provide you with the code snippets that you can copy and paste into your Colab notebook to establish a connection with your locally installed Giskard server\n", - "
" - ], - "metadata": { - "collapsed": false - } - }, - { - "cell_type": "code", - "execution_count": null, - "outputs": [], - "source": [], - "metadata": { - "collapsed": false - } - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 2 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython2", - "version": "2.7.6" - } - }, - "nbformat": 4, - "nbformat_minor": 0 -} + "nbformat": 4, + "nbformat_minor": 0 +} \ No newline at end of file diff --git a/python-client/docs/reference/notebooks/drug_classification_sklearn.ipynb b/python-client/docs/reference/notebooks/drug_classification_sklearn.ipynb index a633618d40..b0dfbcc653 100644 --- a/python-client/docs/reference/notebooks/drug_classification_sklearn.ipynb +++ b/python-client/docs/reference/notebooks/drug_classification_sklearn.ipynb @@ -1,1379 +1,1379 @@ { - "cells": [ - { - "cell_type": "markdown", - "id": "2a6fbb55a4cfd3e2", - "metadata": { - "collapsed": false - }, - "source": [ - "# Drug classification [sklearn]\n", - "* Multiclass classification of drug type, given person's health data.\n", - "* Reference notebook: \n", - "* Dataset: \n", - "\n", - "By running this notebook, you’ll create a whole test suite in a few lines of code. The model used here is a support vector classification model with the drug classification dataset. Feel free to use your own model (tabular, text, or LLM).\n", - "\n", - "You’ll learn how to:\n", - "* Detect vulnerabilities by scanning the model\n", - "* Generate a test suite with domain-specific tests\n", - "* Customize your test suite by loading a test from the Giskard catalog\n", - "* Upload your model to the Giskard server to:\n", - " * Compare models to decide which one to promote\n", - " * Debug your tests to diagnose issues\n", - " * Share your results and collect business feedback from your team" - ] - }, - { - "cell_type": "markdown", - "id": "b79205f540a44a79", - "metadata": { - "collapsed": false - }, - "source": [ - "## Install Giskard" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "id": "85a76ae027fad887", - "metadata": { - "ExecuteTime": { - "end_time": "2023-08-21T12:14:54.475924Z", - "start_time": "2023-08-21T12:14:54.438809Z" + "cells": [ + { + "cell_type": "markdown", + "id": "2a6fbb55a4cfd3e2", + "metadata": { + "collapsed": false + }, + "source": [ + "# Drug classification [sklearn]\n", + "* Multiclass classification of drug type, given person's health data.\n", + "* Reference notebook: \n", + "* Dataset: \n", + "\n", + "By running this notebook, you’ll create a whole test suite in a few lines of code. The model used here is a support vector classification model with the drug classification dataset. Feel free to use your own model (tabular, text, or LLM).\n", + "\n", + "You’ll learn how to:\n", + "* Detect vulnerabilities by scanning the model\n", + "* Generate a test suite with domain-specific tests\n", + "* Customize your test suite by loading a test from the Giskard catalog\n", + "* Upload your model to the Giskard server to:\n", + " * Compare models to decide which one to promote\n", + " * Debug your tests to diagnose issues\n", + " * Share your results and collect business feedback from your team" + ] }, - "collapsed": false - }, - "outputs": [], - "source": [ - "pip install \"giskard>=2.0.0b\" -U" - ] - }, - { - "cell_type": "markdown", - "id": "506c48d3fb95950c", - "metadata": { - "collapsed": false - }, - "source": [ - "## Install additional libraries" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "id": "5e6e6072a4c43431", - "metadata": { - "ExecuteTime": { - "end_time": "2023-08-21T12:14:54.838001Z", - "start_time": "2023-08-21T12:14:54.826752Z" + { + "cell_type": "markdown", + "id": "b79205f540a44a79", + "metadata": { + "collapsed": false + }, + "source": [ + "## Install Giskard" + ] }, - "collapsed": false - }, - "outputs": [], - "source": [ - "!pip install imblearn" - ] - }, - { - "cell_type": "markdown", - "id": "ddce5dbef1dd6b35", - "metadata": { - "collapsed": false - }, - "source": [ - "## Import libraries" - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "id": "5fdae2be34577a32", - "metadata": { - "ExecuteTime": { - "end_time": "2023-08-21T12:14:55.240510Z", - "start_time": "2023-08-21T12:14:55.218702Z" + { + "cell_type": "code", + "execution_count": 14, + "id": "85a76ae027fad887", + "metadata": { + "ExecuteTime": { + "end_time": "2023-08-21T12:14:54.475924Z", + "start_time": "2023-08-21T12:14:54.438809Z" + }, + "collapsed": false + }, + "outputs": [], + "source": [ + "pip install \"giskard>=2.0.0b\" -U" + ] }, - "collapsed": false - }, - "outputs": [], - "source": [ - "import os\n", - "from pathlib import Path\n", - "from urllib.request import urlretrieve\n", - "\n", - "import numpy as np\n", - "import pandas as pd\n", - "from imblearn.over_sampling import SMOTE\n", - "from imblearn.pipeline import Pipeline as PipelineImb\n", - "from sklearn.metrics import accuracy_score\n", - "from sklearn.model_selection import train_test_split\n", - "from sklearn.preprocessing import OneHotEncoder\n", - "from sklearn.svm import SVC\n", - "\n", - "import giskard\n", - "from giskard import Dataset, Model, GiskardClient, testing" - ] - }, - { - "cell_type": "markdown", - "id": "cce300266ae3efc1", - "metadata": { - "collapsed": false - }, - "source": [ - "## Define constants" - ] - }, - { - "cell_type": "code", - "execution_count": 17, - "id": "d44430add2918aa1", - "metadata": { - "ExecuteTime": { - "end_time": "2023-08-21T12:14:55.637702Z", - "start_time": "2023-08-21T12:14:55.619404Z" + { + "cell_type": "markdown", + "id": "506c48d3fb95950c", + "metadata": { + "collapsed": false + }, + "source": [ + "## Install additional libraries" + ] }, - "collapsed": false - }, - "outputs": [], - "source": [ - "# Constants.\n", - "RANDOM_SEED = 0\n", - "\n", - "TARGET_NAME = \"Drug\"\n", - "\n", - "AGE_BINS = [0, 19, 29, 39, 49, 59, 69, 80]\n", - "AGE_CATEGORIES = ['<20s', '20s', '30s', '40s', '50s', '60s', '>60s']\n", - "\n", - "NA_TO_K_BINS = [0, 9, 19, 29, 50]\n", - "NA_TO_K_CATEGORIES = ['<10', '10-20', '20-30', '>30']\n", - "\n", - "# Paths.\n", - "DATA_URL = \"ftp://sys.giskard.ai/pub/unit_test_resources/drug_classification_dataset/drug200.csv\"\n", - "DATA_PATH = Path.home() / \".giskard\" / \"drug_classification_dataset\" / \"drug200.csv\"" - ] - }, - { - "cell_type": "markdown", - "id": "8470f75dc6f081ae", - "metadata": { - "collapsed": false - }, - "source": [ - "## Dataset preparation" - ] - }, - { - "cell_type": "markdown", - "id": "7a7b9f4a405e531d", - "metadata": { - "collapsed": false - }, - "source": [ - "### Load and preprocess data" - ] - }, - { - "cell_type": "code", - "execution_count": 18, - "id": "5a2fbb53dd96b195", - "metadata": { - "ExecuteTime": { - "end_time": "2023-08-21T12:14:56.248510Z", - "start_time": "2023-08-21T12:14:56.234545Z" + { + "cell_type": "code", + "execution_count": 15, + "id": "5e6e6072a4c43431", + "metadata": { + "ExecuteTime": { + "end_time": "2023-08-21T12:14:54.838001Z", + "start_time": "2023-08-21T12:14:54.826752Z" + }, + "collapsed": false + }, + "outputs": [], + "source": [ + "!pip install imblearn" + ] }, - "collapsed": false - }, - "outputs": [], - "source": [ - "def fetch_from_ftp(url: str, file: Path) -> None:\n", - " \"\"\"Helper to fetch data from the FTP server.\"\"\"\n", - " if not file.parent.exists():\n", - " file.parent.mkdir(parents=True, exist_ok=True)\n", - "\n", - " if not file.exists():\n", - " print(f\"Downloading data from {url}\")\n", - " urlretrieve(url, file)\n", - "\n", - " print(f\"Data was loaded!\")\n", - "\n", - "\n", - "def load_data() -> pd.DataFrame:\n", - " \"\"\"Load data.\"\"\"\n", - " fetch_from_ftp(DATA_URL, DATA_PATH)\n", - " df = pd.read_csv(DATA_PATH)\n", - " return df\n", - "\n", - "\n", - "def bin_numerical(df: pd.DataFrame) -> np.ndarray:\n", - " \"\"\"Perform numerical features binning.\"\"\"\n", - "\n", - " def _bin_age(_df: pd.DataFrame) -> pd.DataFrame:\n", - " \"\"\"Bin age feature.\"\"\"\n", - " _df.Age = pd.cut(_df.Age, bins=AGE_BINS, labels=AGE_CATEGORIES)\n", - " return _df\n", - "\n", - " def _bin_na_to_k(_df: pd.DataFrame) -> pd.DataFrame:\n", - " \"\"\"Bin Na_to_K feature.\"\"\"\n", - " _df.Na_to_K = pd.cut(_df.Na_to_K, bins=NA_TO_K_BINS, labels=NA_TO_K_CATEGORIES)\n", - " return _df\n", - "\n", - " df = df.copy()\n", - " df = _bin_age(df)\n", - " df = _bin_na_to_k(df)\n", - "\n", - " return df" - ] - }, - { - "cell_type": "code", - "execution_count": 19, - "id": "a1887adb", - "metadata": { - "ExecuteTime": { - "end_time": "2023-08-21T12:14:56.527235Z", - "start_time": "2023-08-21T12:14:56.463650Z" + { + "cell_type": "markdown", + "id": "ddce5dbef1dd6b35", + "metadata": { + "collapsed": false + }, + "source": [ + "## Import libraries" + ] }, - "execution": { - "iopub.execute_input": "2022-05-04T02:53:16.032035Z", - "iopub.status.busy": "2022-05-04T02:53:16.030941Z", - "iopub.status.idle": "2022-05-04T02:53:16.050526Z", - "shell.execute_reply": "2022-05-04T02:53:16.051084Z", - "shell.execute_reply.started": "2022-03-08T01:25:03.464398Z" + { + "cell_type": "code", + "execution_count": 16, + "id": "5fdae2be34577a32", + "metadata": { + "ExecuteTime": { + "end_time": "2023-08-21T12:14:55.240510Z", + "start_time": "2023-08-21T12:14:55.218702Z" + }, + "collapsed": false + }, + "outputs": [], + "source": [ + "import os\n", + "from pathlib import Path\n", + "from urllib.request import urlretrieve\n", + "\n", + "import numpy as np\n", + "import pandas as pd\n", + "from imblearn.over_sampling import SMOTE\n", + "from imblearn.pipeline import Pipeline as PipelineImb\n", + "from sklearn.metrics import accuracy_score\n", + "from sklearn.model_selection import train_test_split\n", + "from sklearn.preprocessing import OneHotEncoder\n", + "from sklearn.svm import SVC\n", + "\n", + "import giskard\n", + "from giskard import Dataset, Model, GiskardClient, testing" + ] }, - "id": "MttOdogGf977", - "papermill": { - "duration": 0.081087, - "end_time": "2022-05-04T02:53:16.051265", - "exception": false, - "start_time": "2022-05-04T02:53:15.970178", - "status": "completed" + { + "cell_type": "markdown", + "id": "cce300266ae3efc1", + "metadata": { + "collapsed": false + }, + "source": [ + "## Define constants" + ] }, - "tags": [] - }, - "outputs": [ { - "name": "stdout", - "output_type": "stream", - "text": [ - "Data was loaded!\n" - ] - } - ], - "source": [ - "df_drug = load_data()\n", - "df_drug = bin_numerical(df_drug)" - ] - }, - { - "cell_type": "markdown", - "id": "4bcaebe9404ac3ac", - "metadata": { - "collapsed": false - }, - "source": [ - "### Train-test split" - ] - }, - { - "cell_type": "code", - "execution_count": 20, - "id": "7c32c64979960c7d", - "metadata": { - "ExecuteTime": { - "end_time": "2023-08-21T12:14:56.919326Z", - "start_time": "2023-08-21T12:14:56.844334Z" + "cell_type": "code", + "execution_count": 17, + "id": "d44430add2918aa1", + "metadata": { + "ExecuteTime": { + "end_time": "2023-08-21T12:14:55.637702Z", + "start_time": "2023-08-21T12:14:55.619404Z" + }, + "collapsed": false + }, + "outputs": [], + "source": [ + "# Constants.\n", + "RANDOM_SEED = 0\n", + "\n", + "TARGET_NAME = \"Drug\"\n", + "\n", + "AGE_BINS = [0, 19, 29, 39, 49, 59, 69, 80]\n", + "AGE_CATEGORIES = ['<20s', '20s', '30s', '40s', '50s', '60s', '>60s']\n", + "\n", + "NA_TO_K_BINS = [0, 9, 19, 29, 50]\n", + "NA_TO_K_CATEGORIES = ['<10', '10-20', '20-30', '>30']\n", + "\n", + "# Paths.\n", + "DATA_URL = \"ftp://sys.giskard.ai/pub/unit_test_resources/drug_classification_dataset/drug200.csv\"\n", + "DATA_PATH = Path.home() / \".giskard\" / \"drug_classification_dataset\" / \"drug200.csv\"" + ] }, - "collapsed": false - }, - "outputs": [], - "source": [ - "X_train, X_test, y_train, y_test = train_test_split(df_drug.drop(TARGET_NAME, axis=1), df_drug.Drug,\n", - " test_size=0.3, random_state=RANDOM_SEED)" - ] - }, - { - "cell_type": "markdown", - "id": "1e24a95451d322dc", - "metadata": { - "collapsed": false - }, - "source": [ - "### Wrap dataset with Giskard" - ] - }, - { - "cell_type": "code", - "execution_count": 21, - "id": "3c6c6bea2652fe95", - "metadata": { - "ExecuteTime": { - "end_time": "2023-08-21T12:14:57.578019Z", - "start_time": "2023-08-21T12:14:57.497833Z" + { + "cell_type": "markdown", + "id": "8470f75dc6f081ae", + "metadata": { + "collapsed": false + }, + "source": [ + "## Dataset preparation" + ] }, - "collapsed": false - }, - "outputs": [], - "source": [ - "raw_dataset = pd.concat([X_train, y_train], axis=1)\n", - "wrapped_dataset = Dataset(\n", - " df=raw_dataset, # A pandas.DataFrame that contains the raw data (before all the pre-processing steps) and the actual ground truth variable (target).\n", - " target=TARGET_NAME, # Ground truth variable.\n", - " name=\"drug_classification_dataset\", # Optional.\n", - " cat_columns=X_test.columns.tolist() # List of categorical columns. Optional, but is a MUST if available. Inferred automatically if not.\n", - ")" - ] - }, - { - "cell_type": "markdown", - "id": "bacc2a6c", - "metadata": { - "id": "2D1sczYQneQZ", - "papermill": { - "duration": 0.072561, - "end_time": "2022-05-04T02:53:28.063558", - "exception": false, - "start_time": "2022-05-04T02:53:27.990997", - "status": "completed" + { + "cell_type": "markdown", + "id": "7a7b9f4a405e531d", + "metadata": { + "collapsed": false + }, + "source": [ + "### Load and preprocess data" + ] }, - "tags": [] - }, - "source": [ - "## Train model" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "2339133f089ed14d", - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "pipeline = PipelineImb(steps=[\n", - " (\"one_hot_encoder\", OneHotEncoder()),\n", - " (\"resampler\", SMOTE(random_state=RANDOM_SEED)),\n", - " (\"classifier\", SVC(kernel='linear', max_iter=250, random_state=RANDOM_SEED, probability=True))\n", - "])\n", - "\n", - "print(f\"Model training...\")\n", - "pipeline.fit(X_train, y_train)\n", - "print(f\"Model training finished!\")\n", - "\n", - "print(f\"Model testing...\")\n", - "y_train_pred = pipeline.predict(X_train)\n", - "y_test_pred = pipeline.predict(X_test)\n", - "train_metric = accuracy_score(y_train_pred, y_train)\n", - "test_metric = accuracy_score(y_test_pred, y_test)\n", - "print(f\"Train accuracy score: {train_metric:.2f}\\n\"\n", - " f\"Test accuracy score: {test_metric:.2f}\")" - ] - }, - { - "cell_type": "markdown", - "id": "619ac5f0aaefbdbd", - "metadata": { - "collapsed": false - }, - "source": [ - "### Define prediction function" - ] - }, - { - "cell_type": "code", - "execution_count": 23, - "id": "3530c0cd1dfdb81", - "metadata": { - "ExecuteTime": { - "end_time": "2023-08-21T12:15:00.352881Z", - "start_time": "2023-08-21T12:15:00.317287Z" + { + "cell_type": "code", + "execution_count": 18, + "id": "5a2fbb53dd96b195", + "metadata": { + "ExecuteTime": { + "end_time": "2023-08-21T12:14:56.248510Z", + "start_time": "2023-08-21T12:14:56.234545Z" + }, + "collapsed": false + }, + "outputs": [], + "source": [ + "def fetch_from_ftp(url: str, file: Path) -> None:\n", + " \"\"\"Helper to fetch data from the FTP server.\"\"\"\n", + " if not file.parent.exists():\n", + " file.parent.mkdir(parents=True, exist_ok=True)\n", + "\n", + " if not file.exists():\n", + " print(f\"Downloading data from {url}\")\n", + " urlretrieve(url, file)\n", + "\n", + " print(f\"Data was loaded!\")\n", + "\n", + "\n", + "def load_data() -> pd.DataFrame:\n", + " \"\"\"Load data.\"\"\"\n", + " fetch_from_ftp(DATA_URL, DATA_PATH)\n", + " df = pd.read_csv(DATA_PATH)\n", + " return df\n", + "\n", + "\n", + "def bin_numerical(df: pd.DataFrame) -> np.ndarray:\n", + " \"\"\"Perform numerical features binning.\"\"\"\n", + "\n", + " def _bin_age(_df: pd.DataFrame) -> pd.DataFrame:\n", + " \"\"\"Bin age feature.\"\"\"\n", + " _df.Age = pd.cut(_df.Age, bins=AGE_BINS, labels=AGE_CATEGORIES)\n", + " return _df\n", + "\n", + " def _bin_na_to_k(_df: pd.DataFrame) -> pd.DataFrame:\n", + " \"\"\"Bin Na_to_K feature.\"\"\"\n", + " _df.Na_to_K = pd.cut(_df.Na_to_K, bins=NA_TO_K_BINS, labels=NA_TO_K_CATEGORIES)\n", + " return _df\n", + "\n", + " df = df.copy()\n", + " df = _bin_age(df)\n", + " df = _bin_na_to_k(df)\n", + "\n", + " return df" + ] }, - "collapsed": false - }, - "outputs": [], - "source": [ - "def prediction_function(df: pd.DataFrame) -> np.ndarray:\n", - " return pipeline.predict_proba(df)" - ] - }, - { - "cell_type": "markdown", - "id": "9afe761ea34d1d34", - "metadata": { - "collapsed": false - }, - "source": [ - "### Wrap model with Giskard" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "c274f07321e762f9", - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "wrapped_model = Model(\n", - " model=prediction_function, # A prediction function that encapsulates all the data pre-processing steps and that could be executed with the dataset used by the scan.\n", - " model_type=\"classification\", # Either regression, classification or text_generation.\n", - " name=\"drug_classifier\", # Optional.\n", - " classification_labels=pipeline.classes_, # Their order MUST be identical to the prediction_function's output order.\n", - " feature_names=X_train.columns.tolist() # Default: all columns of your dataset.\n", - ")\n", - "\n", - "# Validate wrapped model.\n", - "wrapped_y_train_pred = pipeline.classes_[wrapped_model.predict(wrapped_dataset).raw_prediction]\n", - "wrapped_train_metric = accuracy_score(wrapped_y_train_pred, y_train)\n", - "print(f\"Wrapped Train accuracy score: {wrapped_train_metric:.2f}\")" - ] - }, - { - "cell_type": "markdown", - "id": "6efff9b653fdfb6c", - "metadata": { - "collapsed": false - }, - "source": [ - "## Scan your model to find vulnerabilities\n", - "With the Giskard scan feature, you can detect vulnerabilities in your model, including performance biases, unrobustness, data leakage, stochasticity, underconfidence, ethical issues, and more. For detailed information about the scan feature, please refer to our scan documentation." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "1125a876a29f98f4", - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "results = giskard.scan(wrapped_model, wrapped_dataset)" - ] - }, - { - "cell_type": "code", - "execution_count": 26, - "id": "82db2acb6c2ae6dd", - "metadata": { - "ExecuteTime": { - "end_time": "2023-08-21T12:15:08.860866Z", - "start_time": "2023-08-21T12:15:08.688264Z" + { + "cell_type": "code", + "execution_count": 19, + "id": "a1887adb", + "metadata": { + "ExecuteTime": { + "end_time": "2023-08-21T12:14:56.527235Z", + "start_time": "2023-08-21T12:14:56.463650Z" + }, + "execution": { + "iopub.execute_input": "2022-05-04T02:53:16.032035Z", + "iopub.status.busy": "2022-05-04T02:53:16.030941Z", + "iopub.status.idle": "2022-05-04T02:53:16.050526Z", + "shell.execute_reply": "2022-05-04T02:53:16.051084Z", + "shell.execute_reply.started": "2022-03-08T01:25:03.464398Z" + }, + "id": "MttOdogGf977", + "papermill": { + "duration": 0.081087, + "end_time": "2022-05-04T02:53:16.051265", + "exception": false, + "start_time": "2022-05-04T02:53:15.970178", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Data was loaded!\n" + ] + } + ], + "source": [ + "df_drug = load_data()\n", + "df_drug = bin_numerical(df_drug)" + ] + }, + { + "cell_type": "markdown", + "id": "4bcaebe9404ac3ac", + "metadata": { + "collapsed": false + }, + "source": [ + "### Train-test split" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "id": "7c32c64979960c7d", + "metadata": { + "ExecuteTime": { + "end_time": "2023-08-21T12:14:56.919326Z", + "start_time": "2023-08-21T12:14:56.844334Z" + }, + "collapsed": false + }, + "outputs": [], + "source": [ + "X_train, X_test, y_train, y_test = train_test_split(df_drug.drop(TARGET_NAME, axis=1), df_drug.Drug,\n", + " test_size=0.3, random_state=RANDOM_SEED)" + ] + }, + { + "cell_type": "markdown", + "id": "1e24a95451d322dc", + "metadata": { + "collapsed": false + }, + "source": [ + "### Wrap dataset with Giskard" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "id": "3c6c6bea2652fe95", + "metadata": { + "ExecuteTime": { + "end_time": "2023-08-21T12:14:57.578019Z", + "start_time": "2023-08-21T12:14:57.497833Z" + }, + "collapsed": false + }, + "outputs": [], + "source": [ + "raw_dataset = pd.concat([X_train, y_train], axis=1)\n", + "wrapped_dataset = Dataset(\n", + " df=raw_dataset, # A pandas.DataFrame that contains the raw data (before all the pre-processing steps) and the actual ground truth variable (target).\n", + " target=TARGET_NAME, # Ground truth variable.\n", + " name=\"drug_classification_dataset\", # Optional.\n", + " cat_columns=X_test.columns.tolist() # List of categorical columns. Optional, but is a MUST if available. Inferred automatically if not.\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "bacc2a6c", + "metadata": { + "id": "2D1sczYQneQZ", + "papermill": { + "duration": 0.072561, + "end_time": "2022-05-04T02:53:28.063558", + "exception": false, + "start_time": "2022-05-04T02:53:27.990997", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "## Train model" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2339133f089ed14d", + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "pipeline = PipelineImb(steps=[\n", + " (\"one_hot_encoder\", OneHotEncoder()),\n", + " (\"resampler\", SMOTE(random_state=RANDOM_SEED)),\n", + " (\"classifier\", SVC(kernel='linear', max_iter=250, random_state=RANDOM_SEED, probability=True))\n", + "])\n", + "\n", + "print(f\"Model training...\")\n", + "pipeline.fit(X_train, y_train)\n", + "print(f\"Model training finished!\")\n", + "\n", + "print(f\"Model testing...\")\n", + "y_train_pred = pipeline.predict(X_train)\n", + "y_test_pred = pipeline.predict(X_test)\n", + "train_metric = accuracy_score(y_train_pred, y_train)\n", + "test_metric = accuracy_score(y_test_pred, y_test)\n", + "print(f\"Train accuracy score: {train_metric:.2f}\\n\"\n", + " f\"Test accuracy score: {test_metric:.2f}\")" + ] + }, + { + "cell_type": "markdown", + "id": "619ac5f0aaefbdbd", + "metadata": { + "collapsed": false + }, + "source": [ + "### Define prediction function" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "id": "3530c0cd1dfdb81", + "metadata": { + "ExecuteTime": { + "end_time": "2023-08-21T12:15:00.352881Z", + "start_time": "2023-08-21T12:15:00.317287Z" + }, + "collapsed": false + }, + "outputs": [], + "source": [ + "def prediction_function(df: pd.DataFrame) -> np.ndarray:\n", + " return pipeline.predict_proba(df)" + ] + }, + { + "cell_type": "markdown", + "id": "9afe761ea34d1d34", + "metadata": { + "collapsed": false + }, + "source": [ + "### Wrap model with Giskard" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c274f07321e762f9", + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "wrapped_model = Model(\n", + " model=prediction_function, # A prediction function that encapsulates all the data pre-processing steps and that could be executed with the dataset used by the scan.\n", + " model_type=\"classification\", # Either regression, classification or text_generation.\n", + " name=\"drug_classifier\", # Optional.\n", + " classification_labels=pipeline.classes_, # Their order MUST be identical to the prediction_function's output order.\n", + " feature_names=X_train.columns.tolist() # Default: all columns of your dataset.\n", + ")\n", + "\n", + "# Validate wrapped model.\n", + "wrapped_y_train_pred = pipeline.classes_[wrapped_model.predict(wrapped_dataset).raw_prediction]\n", + "wrapped_train_metric = accuracy_score(wrapped_y_train_pred, y_train)\n", + "print(f\"Wrapped Train accuracy score: {wrapped_train_metric:.2f}\")" + ] + }, + { + "cell_type": "markdown", + "id": "6efff9b653fdfb6c", + "metadata": { + "collapsed": false + }, + "source": [ + "## Scan your model to find vulnerabilities\n", + "With the Giskard scan feature, you can detect vulnerabilities in your model, including performance biases, unrobustness, data leakage, stochasticity, underconfidence, ethical issues, and more. For detailed information about the scan feature, please refer to our scan documentation." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1125a876a29f98f4", + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "results = giskard.scan(wrapped_model, wrapped_dataset)" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "id": "82db2acb6c2ae6dd", + "metadata": { + "ExecuteTime": { + "end_time": "2023-08-21T12:15:08.860866Z", + "start_time": "2023-08-21T12:15:08.688264Z" + }, + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "display(results)" + ] }, - "collapsed": false - }, - "outputs": [ { - "data": { - "text/html": [ - "\n", - "" + "cell_type": "markdown", + "id": "47f81237eacb305a", + "metadata": { + "collapsed": false + }, + "source": [ + "## Generate a test suite from the Scan\n", + "The objects produced by the scan can be used as fixtures to generate a test suite that integrate domain-specific issues. To create custom tests, refer to the Test your ML Model page." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f44d26a78bda617e", + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "test_suite = results.generate_test_suite(\"My first test suite\")\n", + "test_suite.run()" + ] + }, + { + "cell_type": "markdown", + "id": "95c7d1249aa97260", + "metadata": { + "collapsed": false + }, + "source": [ + "## Customize your suite by loading objects from the Giskard catalog\n", + "\n", + "The Giskard open source catalog will enable to load:\n", + "* Tests such as metamorphic, performance, prediction & data drift, statistical tests, etc\n", + "* Slicing functions such as detectors of toxicity, hate, emotion, etc\n", + "* Transformation functions such as generators of typos, paraphrase, style tune, etc\n", + "\n", + "For demo purposes, we will load a simple unit test (test_f1) that checks if the test F1 score is above the given threshold. For more examples of tests and functions, refer to the Giskard catalog." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "295ef8575d0f0b19", + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "test_suite.add_test(testing.test_f1(model=wrapped_model, dataset=wrapped_dataset, threshold=0.7)).run()" + ] + }, + { + "cell_type": "markdown", + "id": "f2c270bda4037820", + "metadata": { + "collapsed": false + }, + "source": [ + "## Upload your suite to the Giskard server\n", + "\n", + "Upload your suite to the Giskard server to:\n", + "* Compare models to decide which model to promote\n", + "* Debug your tests to diagnose the issues\n", + "* Create more domain-specific tests that are integrating business feedback\n", + "* Share your results" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "95186436fe201810", + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# Uploading the test suite will automatically save the model, dataset, tests, slicing & transformation functions inside the Giskard UI server\n", + "# Create a Giskard client after having install the Giskard server (see documentation)\n", + "token = \"API_TOKEN\" # Find it in Settings in the Giskard server\n", + "\n", + "client = GiskardClient(\n", + " url=\"http://localhost:19000\", # URL of your Giskard instance\n", + " token=token\n", + ")\n", + "\n", + "my_project = client.create_project(\"my_project\", \"PROJECT_NAME\", \"DESCRIPTION\")\n", + "\n", + "# Upload to the current project ✉️\n", + "test_suite.upload(client, \"my_project\")" + ] + }, + { + "cell_type": "markdown", + "id": "5e6a3c1e12f8cedd", + "metadata": { + "collapsed": false + }, + "source": [ + "
\n", + "Connecting Google Colab with the Giskard server\n", + "\n", + "If you are using Google Colab and you want to install the Giskard server **locally**, you can run the Giskard server by executing this line in the terminal of your **local** machine (see the [documentation](https://docs.giskard.ai/en/latest/guides/installation_app/index.html)):\n", + "\n", + "> giskard server start\n", + "\n", + "Once the Giskard server is running, from the same terminal on your **local** machine, you can run:\n", + "\n", + "> giskard server expose --ngrok-token \\\n", + "\n", + "Read the flowing [instructions](https://docs.giskard.ai/en/latest/cli/ngrok/index.html) in order to get the `ngrok_API_token`. This will provide you with the code snippets that you can copy and paste into your Colab notebook to establish a connection with your locally installed Giskard server\n", + "
" ] - }, - "metadata": {}, - "output_type": "display_data" } - ], - "source": [ - "display(results)" - ] - }, - { - "cell_type": "markdown", - "id": "47f81237eacb305a", - "metadata": { - "collapsed": false - }, - "source": [ - "## Generate a test suite from the Scan\n", - "The objects produced by the scan can be used as fixtures to generate a test suite that integrate domain-specific issues. To create custom tests, refer to the Test your ML Model page." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "f44d26a78bda617e", - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "test_suite = results.generate_test_suite(\"My first test suite\")\n", - "test_suite.run()" - ] - }, - { - "cell_type": "markdown", - "id": "95c7d1249aa97260", - "metadata": { - "collapsed": false - }, - "source": [ - "## Customize your suite by loading objects from the Giskard catalog\n", - "\n", - "The Giskard open source catalog will enable to load:\n", - "* Tests such as metamorphic, performance, prediction & data drift, statistical tests, etc\n", - "* Slicing functions such as detectors of toxicity, hate, emotion, etc\n", - "* Transformation functions such as generators of typos, paraphrase, style tune, etc\n", - "\n", - "For demo purposes, we will load a simple unit test (test_f1) that checks if the test F1 score is above the given threshold. For more examples of tests and functions, refer to the Giskard catalog." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "295ef8575d0f0b19", - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "test_suite.add_test(testing.test_f1(model=wrapped_model, dataset=wrapped_dataset, threshold=0.7)).run()" - ] - }, - { - "cell_type": "markdown", - "id": "f2c270bda4037820", - "metadata": { - "collapsed": false - }, - "source": [ - "## Upload your suite to the Giskard server\n", - "\n", - "Upload your suite to the Giskard server to:\n", - "* Compare models to decide which model to promote\n", - "* Debug your tests to diagnose the issues\n", - "* Create more domain-specific tests that are integrating business feedback\n", - "* Share your results" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "95186436fe201810", - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "# Uploading the test suite will automatically save the model, dataset, tests, slicing & transformation functions inside the Giskard UI server\n", - "# Create a Giskard client after having install the Giskard server (see documentation)\n", - "token = \"API_TOKEN\" # Find it in Settings in the Giskard server\n", - "\n", - "client = GiskardClient(\n", - " url=\"http://localhost:19000\", # URL of your Giskard instance\n", - " token=token\n", - ")\n", - "\n", - "my_project = client.create_project(\"my_project\", \"PROJECT_NAME\", \"DESCRIPTION\")\n", - "\n", - "# Upload to the current project ✉️\n", - "test_suite.upload(client, \"my_project\")" - ] - }, - { - "cell_type": "markdown", - "id": "5e6a3c1e12f8cedd", - "metadata": { - "collapsed": false - }, - "source": [ - "
\n", - "Connecting Google Colab with the Giskard server\n", - "\n", - "If you are using Google Colab and you want to install the Giskard server **locally**, you can run the Giskard server by executing this line in the terminal of your **local** machine (see the [documentation](https://docs.giskard.ai/en/latest/guides/installation_app/index.html)):\n", - "\n", - "> giskard server start\n", - "\n", - "Once the Giskard server is running, from the same terminal on your **local** machine, you can run:\n", - "\n", - "> giskard server expose --token \\\n", - "\n", - "Read the flowing [instructions](https://docs.giskard.ai/en/latest/cli/ngrok/index.html) in order to get the `ngrok_API_token`. This will provide you with the code snippets that you can copy and paste into your Colab notebook to establish a connection with your locally installed Giskard server\n", - "
" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.7.10" + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.10" + }, + "papermill": { + "default_parameters": {}, + "duration": 123.972965, + "end_time": "2022-05-04T02:55:10.135212", + "environment_variables": {}, + "exception": null, + "input_path": "__notebook__.ipynb", + "output_path": "__notebook__.ipynb", + "parameters": {}, + "start_time": "2022-05-04T02:53:06.162247", + "version": "2.3.3" + } }, - "papermill": { - "default_parameters": {}, - "duration": 123.972965, - "end_time": "2022-05-04T02:55:10.135212", - "environment_variables": {}, - "exception": null, - "input_path": "__notebook__.ipynb", - "output_path": "__notebook__.ipynb", - "parameters": {}, - "start_time": "2022-05-04T02:53:06.162247", - "version": "2.3.3" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} + "nbformat": 4, + "nbformat_minor": 5 +} \ No newline at end of file diff --git a/python-client/docs/reference/notebooks/enron_email_classification_sklearn.ipynb b/python-client/docs/reference/notebooks/enron_email_classification_sklearn.ipynb index 4a24d2dddb..f4d3fedb9d 100644 --- a/python-client/docs/reference/notebooks/enron_email_classification_sklearn.ipynb +++ b/python-client/docs/reference/notebooks/enron_email_classification_sklearn.ipynb @@ -1,570 +1,570 @@ { - "cells": [ - { - "attachments": {}, - "cell_type": "markdown", - "id": "990eccb8", - "metadata": {}, - "source": [ - "# ENRON email classification [sklearn]\n", - "\n", - "
\n", - "What is Giskard ?\n", - "\n", - "Giskard is an open-source testing framework dedicated to ML models, ranging from tabular to LLM. [To know more about Giskard, click here](https://docs.giskard.ai/en/latest/getting-started/index.html).\n", - "
" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "40f23d1a", - "metadata": {}, - "source": [ - "By running this notebook, you'll create a whole test suite in a few lines of code. The model used here is a scikit-learn classification model. It is used to predict categories of emails in the ENRON dataset.\n", - "\n", - "You'll learn how to:\n", - "\n", - "- Detect vulnerabilities by scanning the model\n", - "\n", - "- Generate a test suite with domain-specific tests\n", - "\n", - "- Customize your test suite by loading a test from the Giskard catalog\n", - "\n", - "- Upload your model to the Giskard server to:\n", - "\n", - " - Compare models to decide which one to promote\n", - "\n", - " - Debug your tests to diagnose issues\n", - "\n", - " - Share your results and collect business feedback from your team\n" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "da9a6eac", - "metadata": {}, - "source": [ - "## Install Giskard\n", - "\n", - "To see the list of Python requirements, please refer to [the documentation](https://docs.giskard.ai/en/latest/guides/installation_library/index.html)." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "pip install \"giskard>=2.0.0b\" -U" - ], - "id": "133e4cf63ec27b24" - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "58613d7d", - "metadata": {}, - "source": [ - "## Import libraries" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "7d960163", - "metadata": {}, - "outputs": [], - "source": [ - "import email\n", - "import glob\n", - "from collections import defaultdict\n", - "from string import punctuation\n", - "\n", - "import nltk\n", - "import pandas as pd\n", - "from dateutil import parser\n", - "from nltk.corpus import stopwords\n", - "from nltk.stem import PorterStemmer\n", - "from sklearn import model_selection\n", - "from sklearn.compose import ColumnTransformer\n", - "from sklearn.feature_extraction.text import CountVectorizer\n", - "from sklearn.feature_extraction.text import TfidfTransformer\n", - "from sklearn.linear_model import LogisticRegression\n", - "from sklearn.pipeline import Pipeline\n", - "\n", - "import giskard" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "d7cb9261", - "metadata": {}, - "source": [ - "## Import data and load it into Giskard" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "dfa3456f", - "metadata": {}, - "source": [ - "### Import data" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "8d808e73", - "metadata": {}, - "outputs": [], - "source": [ - "!wget http://bailando.sims.berkeley.edu/enron/enron_with_categories.tar.gz\n", - "!tar zxf enron_with_categories.tar.gz\n", - "!rm enron_with_categories.tar.gz" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "750f63f6", - "metadata": {}, - "source": [ - "### Pre-process and filter data" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "e3c3e6a5", - "metadata": {}, - "outputs": [], - "source": [ - "nltk.download('punkt')\n", - "nltk.download('stopwords')\n", - "\n", - "stoplist = list(set(stopwords.words('english') + list(punctuation)))\n", - "stemmer = PorterStemmer()\n", - "\n", - "# http://bailando.sims.berkeley.edu/enron/enron_categories.txt\n", - "idx_to_cat = {\n", - " 1: 'REGULATION',\n", - " 2: 'INTERNAL',\n", - " 3: 'INFLUENCE',\n", - " 4: 'INFLUENCE',\n", - " 5: 'INFLUENCE',\n", - " 6: 'CALIFORNIA CRISIS',\n", - " 7: 'INTERNAL',\n", - " 8: 'INTERNAL',\n", - " 9: 'INFLUENCE',\n", - " 10: 'REGULATION',\n", - " 11: 'talking points',\n", - " 12: 'meeting minutes',\n", - " 13: 'trip reports'}\n", - "\n", - "idx_to_cat2 = {\n", - " 1: 'regulations and regulators (includes price caps)',\n", - " 2: 'internal projects -- progress and strategy',\n", - " 3: ' company image -- current',\n", - " 4: 'company image -- changing / influencing',\n", - " 5: 'political influence / contributions / contacts',\n", - " 6: 'california energy crisis / california politics',\n", - " 7: 'internal company policy',\n", - " 8: 'internal company operations',\n", - " 9: 'alliances / partnerships',\n", - " 10: 'legal advice',\n", - " 11: 'talking points',\n", - " 12: 'meeting minutes',\n", - " 13: 'trip reports'}\n", - "\n", - "LABEL_CAT = 3 # we'll be using the 2nd-level category \"Primary topics\" because the two first levels provide categories that are not mutually exclusive. see : https://bailando.berkeley.edu/enron/enron_categories.txt\n", - "\n", - "\n", - "#get_labels returns a dictionary representation of these labels.\n", - "def get_labels(filename):\n", - " with open(filename + '.cats') as f:\n", - " labels = defaultdict(dict)\n", - " line = f.readline()\n", - " while line:\n", - " line = line.split(',')\n", - " top_cat, sub_cat, freq = int(line[0]), int(line[1]), int(line[2])\n", - " labels[top_cat][sub_cat] = freq\n", - " line = f.readline()\n", - " return dict(labels)\n", - "\n", - "\n", - "email_files = [f.replace('.cats', '') for f in glob.glob('enron_with_categories/*/*.cats')]\n", - "\n", - "columns_name = ['Target', 'Subject', 'Content', 'Week_day', 'Year', 'Month', 'Hour', 'Nb_of_forwarded_msg']\n", - "\n", - "data = pd.DataFrame(columns=columns_name)\n", - "\n", - "for email_file in email_files:\n", - " values_to_add = {}\n", - "\n", - " #Target is the sub-category with maximum frequency\n", - " if LABEL_CAT in get_labels(email_file):\n", - " sub_cat_dict = get_labels(email_file)[LABEL_CAT]\n", - " target_int = max(sub_cat_dict, key=sub_cat_dict.get)\n", - " values_to_add['Target'] = str(idx_to_cat[target_int])\n", - "\n", - " #Features are metadata from the email object\n", - " filename = email_file + '.txt'\n", - " with open(filename) as f:\n", - "\n", - " message = email.message_from_string(f.read())\n", - "\n", - " values_to_add['Subject'] = str(message['Subject'])\n", - " values_to_add['Content'] = str(message.get_payload())\n", - "\n", - " date_time_obj = parser.parse(message['Date'])\n", - " values_to_add['Week_day'] = date_time_obj.strftime(\"%A\")\n", - " values_to_add['Year'] = date_time_obj.strftime(\"%Y\")\n", - " values_to_add['Month'] = date_time_obj.strftime(\"%B\")\n", - " values_to_add['Hour'] = int(date_time_obj.strftime(\"%H\"))\n", - "\n", - " # Count number of forwarded mails\n", - " number_of_messages = 0\n", - " for line in message.get_payload().split('\\n'):\n", - " if ('forwarded' in line.lower() or 'original' in line.lower()) and '--' in line:\n", - " number_of_messages += 1\n", - " values_to_add['Nb_of_forwarded_msg'] = number_of_messages\n", - "\n", - " row_to_add = pd.Series(values_to_add)\n", - " data = data.append(row_to_add, ignore_index=True)\n", - "\n", - "#We filter 879 rows (if Primary topics exists (i.e. if coarse genre 1.1 is selected) )\n", - "data_filtered = data[data[\"Target\"].notnull()]\n", - "\n", - "#Exclude target category with very few rows ; 812 rows remains\n", - "excluded_category = [idx_to_cat[i] for i in [11, 12, 13]]\n", - "data_filtered = data_filtered[data_filtered[\"Target\"].isin(excluded_category) == False]\n", - "num_classes = len(data_filtered[\"Target\"].value_counts())\n", - "\n", - "# Keep only the email column and the target\n", - "data_filtered = data_filtered[[\"Content\", \"Target\"]]" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "f4e44405", - "metadata": {}, - "source": [ - "### Wrap your dataset into Giskard" - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "id": "b4adbbbd", - "metadata": { - "ExecuteTime": { - "end_time": "2023-08-21T13:14:14.789793Z", - "start_time": "2023-08-21T13:14:14.702493Z" - } - }, - "outputs": [], - "source": [ - "column_types = {\"Content\": \"text\"}\n", - "\n", - "dataset = giskard.Dataset(\n", - " df=data_filtered, # A pandas.DataFrame that contains the raw data (before all the pre-processing steps) and the actual ground truth variable (target).\n", - " target=\"Target\", # Ground truth variable.\n", - " name=\"Email classifier\" # Optional.\n", - ")" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "803e36d9", - "metadata": {}, - "source": [ - "## Create your model & wrap it into Giskard" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "5b964cd7", - "metadata": {}, - "source": [ - "### Train your model" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "dd6c0986", - "metadata": {}, - "outputs": [], - "source": [ - "# Train/test split\n", - "feature_types = {i: column_types[i] for i in column_types if i != \"Target\"}\n", - "Y = data_filtered[\"Target\"]\n", - "X = data_filtered.drop(columns=[\"Target\"])[list(feature_types.keys())]\n", - "X_train, X_test, Y_train, Y_test = model_selection.train_test_split(X, Y, test_size=0.20, random_state=30, stratify=Y)\n", - "\n", - "# Train model\n", - "# feature_types is used to declare the features the model is trained on\n", - "feature_types = {i: column_types[i] for i in column_types if i != 'Target'}\n", - "\n", - "# Pipeline for text transformer\n", - "text_transformer = Pipeline([\n", - " ('vect', CountVectorizer(stop_words=stoplist)),\n", - " ('tfidf', TfidfTransformer())\n", - "])\n", - "preprocessor = ColumnTransformer(\n", - " transformers=[\n", - " ('text_Mail', text_transformer, \"Content\")\n", - " ]\n", - ")\n", - "\n", - "# Pipeline for the model Logistic Regression\n", - "clf = Pipeline(steps=[('preprocessor', preprocessor),\n", - " ('classifier', LogisticRegression(max_iter=1000))])\n", - "\n", - "# Fit and score your model\n", - "clf.fit(X_train, Y_train)\n", - "print(\"Global model score: %.3f\" % clf.score(X_test, Y_test))" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "a005a413", - "metadata": {}, - "source": [ - "### Wrap your model in Giskard" - ] - }, - { - "cell_type": "code", - "execution_count": 20, - "id": "db6c8ef8", - "metadata": { - "ExecuteTime": { - "end_time": "2023-08-21T13:17:22.162139Z", - "start_time": "2023-08-21T13:17:22.094197Z" - } - }, - "outputs": [], - "source": [ - "model = giskard.Model(\n", - " model=clf, # A prediction function that encapsulates all the data pre-processing steps and that could be executed with the dataset used by the scan.\n", - " model_type=\"classification\", # Either regression, classification or text_generation.\n", - " name=\"enron_email_classification\", # Optional.\n", - " classification_labels=clf.classes_.tolist(), # Their order MUST be identical to the prediction_function's output order.\n", - " feature_names=column_types.keys(), # Default: all columns of your dataset.\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "collapsed": false - }, - "source": [ - "## Scan your model to find vulnerabilities\n", - "\n", - "With the Giskard scan feature, you can detect vulnerabilities in your model, including *performance biases*, *unrobustness*, *data leakage*, *stochasticity*, *underconfidence*, *ethical issues*, and *more*. For detailed information about the scan feature, please refer to our scan [documentation](https://docs.giskard.ai/en/latest/guides/scan/index.html)." - ], - "id": "d554fd54dda4d2d6" - }, - { - "cell_type": "code", - "execution_count": null, - "id": "931b100b", - "metadata": {}, - "outputs": [], - "source": [ - "results = giskard.scan(model, dataset)" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "id": "ecb49fa5", - "metadata": { - "ExecuteTime": { - "end_time": "2023-08-21T13:11:14.068128Z", - "start_time": "2023-08-21T13:11:13.646182Z" - } - }, - "outputs": [ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "id": "990eccb8", + "metadata": {}, + "source": [ + "# ENRON email classification [sklearn]\n", + "\n", + "
\n", + "What is Giskard ?\n", + "\n", + "Giskard is an open-source testing framework dedicated to ML models, ranging from tabular to LLM. [To know more about Giskard, click here](https://docs.giskard.ai/en/latest/getting-started/index.html).\n", + "
" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "40f23d1a", + "metadata": {}, + "source": [ + "By running this notebook, you'll create a whole test suite in a few lines of code. The model used here is a scikit-learn classification model. It is used to predict categories of emails in the ENRON dataset.\n", + "\n", + "You'll learn how to:\n", + "\n", + "- Detect vulnerabilities by scanning the model\n", + "\n", + "- Generate a test suite with domain-specific tests\n", + "\n", + "- Customize your test suite by loading a test from the Giskard catalog\n", + "\n", + "- Upload your model to the Giskard server to:\n", + "\n", + " - Compare models to decide which one to promote\n", + "\n", + " - Debug your tests to diagnose issues\n", + "\n", + " - Share your results and collect business feedback from your team\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "da9a6eac", + "metadata": {}, + "source": [ + "## Install Giskard\n", + "\n", + "To see the list of Python requirements, please refer to [the documentation](https://docs.giskard.ai/en/latest/guides/installation_library/index.html)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "pip install \"giskard>=2.0.0b\" -U" + ], + "id": "133e4cf63ec27b24" + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "58613d7d", + "metadata": {}, + "source": [ + "## Import libraries" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7d960163", + "metadata": {}, + "outputs": [], + "source": [ + "import email\n", + "import glob\n", + "from collections import defaultdict\n", + "from string import punctuation\n", + "\n", + "import nltk\n", + "import pandas as pd\n", + "from dateutil import parser\n", + "from nltk.corpus import stopwords\n", + "from nltk.stem import PorterStemmer\n", + "from sklearn import model_selection\n", + "from sklearn.compose import ColumnTransformer\n", + "from sklearn.feature_extraction.text import CountVectorizer\n", + "from sklearn.feature_extraction.text import TfidfTransformer\n", + "from sklearn.linear_model import LogisticRegression\n", + "from sklearn.pipeline import Pipeline\n", + "\n", + "import giskard" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "d7cb9261", + "metadata": {}, + "source": [ + "## Import data and load it into Giskard" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "dfa3456f", + "metadata": {}, + "source": [ + "### Import data" + ] + }, { - "data": { - "text/html": "\n" - }, - "metadata": {}, - "output_type": "display_data" + "cell_type": "code", + "execution_count": null, + "id": "8d808e73", + "metadata": {}, + "outputs": [], + "source": [ + "!wget http://bailando.sims.berkeley.edu/enron/enron_with_categories.tar.gz\n", + "!tar zxf enron_with_categories.tar.gz\n", + "!rm enron_with_categories.tar.gz" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "750f63f6", + "metadata": {}, + "source": [ + "### Pre-process and filter data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e3c3e6a5", + "metadata": {}, + "outputs": [], + "source": [ + "nltk.download('punkt')\n", + "nltk.download('stopwords')\n", + "\n", + "stoplist = list(set(stopwords.words('english') + list(punctuation)))\n", + "stemmer = PorterStemmer()\n", + "\n", + "# http://bailando.sims.berkeley.edu/enron/enron_categories.txt\n", + "idx_to_cat = {\n", + " 1: 'REGULATION',\n", + " 2: 'INTERNAL',\n", + " 3: 'INFLUENCE',\n", + " 4: 'INFLUENCE',\n", + " 5: 'INFLUENCE',\n", + " 6: 'CALIFORNIA CRISIS',\n", + " 7: 'INTERNAL',\n", + " 8: 'INTERNAL',\n", + " 9: 'INFLUENCE',\n", + " 10: 'REGULATION',\n", + " 11: 'talking points',\n", + " 12: 'meeting minutes',\n", + " 13: 'trip reports'}\n", + "\n", + "idx_to_cat2 = {\n", + " 1: 'regulations and regulators (includes price caps)',\n", + " 2: 'internal projects -- progress and strategy',\n", + " 3: ' company image -- current',\n", + " 4: 'company image -- changing / influencing',\n", + " 5: 'political influence / contributions / contacts',\n", + " 6: 'california energy crisis / california politics',\n", + " 7: 'internal company policy',\n", + " 8: 'internal company operations',\n", + " 9: 'alliances / partnerships',\n", + " 10: 'legal advice',\n", + " 11: 'talking points',\n", + " 12: 'meeting minutes',\n", + " 13: 'trip reports'}\n", + "\n", + "LABEL_CAT = 3 # we'll be using the 2nd-level category \"Primary topics\" because the two first levels provide categories that are not mutually exclusive. see : https://bailando.berkeley.edu/enron/enron_categories.txt\n", + "\n", + "\n", + "#get_labels returns a dictionary representation of these labels.\n", + "def get_labels(filename):\n", + " with open(filename + '.cats') as f:\n", + " labels = defaultdict(dict)\n", + " line = f.readline()\n", + " while line:\n", + " line = line.split(',')\n", + " top_cat, sub_cat, freq = int(line[0]), int(line[1]), int(line[2])\n", + " labels[top_cat][sub_cat] = freq\n", + " line = f.readline()\n", + " return dict(labels)\n", + "\n", + "\n", + "email_files = [f.replace('.cats', '') for f in glob.glob('enron_with_categories/*/*.cats')]\n", + "\n", + "columns_name = ['Target', 'Subject', 'Content', 'Week_day', 'Year', 'Month', 'Hour', 'Nb_of_forwarded_msg']\n", + "\n", + "data = pd.DataFrame(columns=columns_name)\n", + "\n", + "for email_file in email_files:\n", + " values_to_add = {}\n", + "\n", + " #Target is the sub-category with maximum frequency\n", + " if LABEL_CAT in get_labels(email_file):\n", + " sub_cat_dict = get_labels(email_file)[LABEL_CAT]\n", + " target_int = max(sub_cat_dict, key=sub_cat_dict.get)\n", + " values_to_add['Target'] = str(idx_to_cat[target_int])\n", + "\n", + " #Features are metadata from the email object\n", + " filename = email_file + '.txt'\n", + " with open(filename) as f:\n", + "\n", + " message = email.message_from_string(f.read())\n", + "\n", + " values_to_add['Subject'] = str(message['Subject'])\n", + " values_to_add['Content'] = str(message.get_payload())\n", + "\n", + " date_time_obj = parser.parse(message['Date'])\n", + " values_to_add['Week_day'] = date_time_obj.strftime(\"%A\")\n", + " values_to_add['Year'] = date_time_obj.strftime(\"%Y\")\n", + " values_to_add['Month'] = date_time_obj.strftime(\"%B\")\n", + " values_to_add['Hour'] = int(date_time_obj.strftime(\"%H\"))\n", + "\n", + " # Count number of forwarded mails\n", + " number_of_messages = 0\n", + " for line in message.get_payload().split('\\n'):\n", + " if ('forwarded' in line.lower() or 'original' in line.lower()) and '--' in line:\n", + " number_of_messages += 1\n", + " values_to_add['Nb_of_forwarded_msg'] = number_of_messages\n", + "\n", + " row_to_add = pd.Series(values_to_add)\n", + " data = data.append(row_to_add, ignore_index=True)\n", + "\n", + "#We filter 879 rows (if Primary topics exists (i.e. if coarse genre 1.1 is selected) )\n", + "data_filtered = data[data[\"Target\"].notnull()]\n", + "\n", + "#Exclude target category with very few rows ; 812 rows remains\n", + "excluded_category = [idx_to_cat[i] for i in [11, 12, 13]]\n", + "data_filtered = data_filtered[data_filtered[\"Target\"].isin(excluded_category) == False]\n", + "num_classes = len(data_filtered[\"Target\"].value_counts())\n", + "\n", + "# Keep only the email column and the target\n", + "data_filtered = data_filtered[[\"Content\", \"Target\"]]" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "f4e44405", + "metadata": {}, + "source": [ + "### Wrap your dataset into Giskard" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "b4adbbbd", + "metadata": { + "ExecuteTime": { + "end_time": "2023-08-21T13:14:14.789793Z", + "start_time": "2023-08-21T13:14:14.702493Z" + } + }, + "outputs": [], + "source": [ + "column_types = {\"Content\": \"text\"}\n", + "\n", + "dataset = giskard.Dataset(\n", + " df=data_filtered, # A pandas.DataFrame that contains the raw data (before all the pre-processing steps) and the actual ground truth variable (target).\n", + " target=\"Target\", # Ground truth variable.\n", + " name=\"Email classifier\" # Optional.\n", + ")" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "803e36d9", + "metadata": {}, + "source": [ + "## Create your model & wrap it into Giskard" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "5b964cd7", + "metadata": {}, + "source": [ + "### Train your model" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "dd6c0986", + "metadata": {}, + "outputs": [], + "source": [ + "# Train/test split\n", + "feature_types = {i: column_types[i] for i in column_types if i != \"Target\"}\n", + "Y = data_filtered[\"Target\"]\n", + "X = data_filtered.drop(columns=[\"Target\"])[list(feature_types.keys())]\n", + "X_train, X_test, Y_train, Y_test = model_selection.train_test_split(X, Y, test_size=0.20, random_state=30, stratify=Y)\n", + "\n", + "# Train model\n", + "# feature_types is used to declare the features the model is trained on\n", + "feature_types = {i: column_types[i] for i in column_types if i != 'Target'}\n", + "\n", + "# Pipeline for text transformer\n", + "text_transformer = Pipeline([\n", + " ('vect', CountVectorizer(stop_words=stoplist)),\n", + " ('tfidf', TfidfTransformer())\n", + "])\n", + "preprocessor = ColumnTransformer(\n", + " transformers=[\n", + " ('text_Mail', text_transformer, \"Content\")\n", + " ]\n", + ")\n", + "\n", + "# Pipeline for the model Logistic Regression\n", + "clf = Pipeline(steps=[('preprocessor', preprocessor),\n", + " ('classifier', LogisticRegression(max_iter=1000))])\n", + "\n", + "# Fit and score your model\n", + "clf.fit(X_train, Y_train)\n", + "print(\"Global model score: %.3f\" % clf.score(X_test, Y_test))" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "a005a413", + "metadata": {}, + "source": [ + "### Wrap your model in Giskard" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "id": "db6c8ef8", + "metadata": { + "ExecuteTime": { + "end_time": "2023-08-21T13:17:22.162139Z", + "start_time": "2023-08-21T13:17:22.094197Z" + } + }, + "outputs": [], + "source": [ + "model = giskard.Model(\n", + " model=clf, # A prediction function that encapsulates all the data pre-processing steps and that could be executed with the dataset used by the scan.\n", + " model_type=\"classification\", # Either regression, classification or text_generation.\n", + " name=\"enron_email_classification\", # Optional.\n", + " classification_labels=clf.classes_.tolist(), # Their order MUST be identical to the prediction_function's output order.\n", + " feature_names=column_types.keys(), # Default: all columns of your dataset.\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "collapsed": false + }, + "source": [ + "## Scan your model to find vulnerabilities\n", + "\n", + "With the Giskard scan feature, you can detect vulnerabilities in your model, including *performance biases*, *unrobustness*, *data leakage*, *stochasticity*, *underconfidence*, *ethical issues*, and *more*. For detailed information about the scan feature, please refer to our scan [documentation](https://docs.giskard.ai/en/latest/guides/scan/index.html)." + ], + "id": "d554fd54dda4d2d6" + }, + { + "cell_type": "code", + "execution_count": null, + "id": "931b100b", + "metadata": {}, + "outputs": [], + "source": [ + "results = giskard.scan(model, dataset)" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "ecb49fa5", + "metadata": { + "ExecuteTime": { + "end_time": "2023-08-21T13:11:14.068128Z", + "start_time": "2023-08-21T13:11:13.646182Z" + } + }, + "outputs": [ + { + "data": { + "text/html": "\n" + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "display(results) # in your notebook" + ] + }, + { + "cell_type": "markdown", + "id": "8b3343fa", + "metadata": {}, + "source": [ + "As you see above, the model may detect various vulnerabilites by displaying:\n", + "\n", + "* Data slices showing unperformance, underconfidence, overconfidence or spurious correlations\n", + "* Data transformations creating robutness or ethical issues\n", + "* Examples making some tests fail\n" + ] + }, + { + "cell_type": "markdown", + "id": "0667bdc9", + "metadata": {}, + "source": [ + "## Generate a test suite from the Scan\n", + "\n", + "The objects produced by the scan can be used as fixtures to generate a test suite that integrate domain-specific issues. To create custom tests, refer to the [Test your ML Model](https://docs.giskard.ai/en/latest/guides/test-suite/index.html) page." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bea736a9", + "metadata": {}, + "outputs": [], + "source": [ + "test_suite = results.generate_test_suite(\"My first test suite\")\n", + "\n", + "# You can run the test suite locally to verify that it reproduces the issues\n", + "test_suite.run()" + ] + }, + { + "cell_type": "markdown", + "id": "882f4638", + "metadata": {}, + "source": [ + "## Customize your suite by loading objects from the Giskard catalog" + ] + }, + { + "cell_type": "markdown", + "id": "502a0767", + "metadata": {}, + "source": [ + "The Giskard open source catalog will enable to load:\n", + "\n", + "* **Tests** such as metamorphic, performance, prediction & data drift, statistical tests, etc\n", + "* **Slicing functions** such as detectors of toxicity, hate, emotion, etc\n", + "* **Transformation functions** such as generators of typos, paraphrase, style tune, etc\n", + "\n", + "For demo purposes, we will load a simple unit test ([test_right_label](https://docs.giskard.ai/en/latest/reference/tests/statistic.html#giskard.testing.test_right_label)) that checks if a given row (the first example) has the right label. For more examples of tests and functions, refer to the [Giskard catalog](https://docs.giskard.ai/en/latest/guides/catalog/index.html)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0c3a076", + "metadata": {}, + "outputs": [], + "source": [ + "# For the test_right_label test we are adding, all the parameters are specified except model\n", + "# This means that we will need to specify model everytime we run the suite: model is a global parameter of the suite\n", + "suite = test_suite \\\n", + " .add_test(\n", + " giskard.testing.test_right_label(dataset=dataset.iloc[[1]], classification_label=\"yes\", threshold=1)).run()" + ] + }, + { + "cell_type": "markdown", + "id": "cf824254", + "metadata": {}, + "source": [ + "## Upload your suite to the Giskard server\n", + "\n", + "
\n", + "Install Giskard Server\n", + "\n", + "To upload your suite to the Giskard Server you must first run the Giskard Server. Refer to the [documentation](https://docs.giskard.ai/en/latest/guides/installation_app/index.html).\n", + "
\n", + "\n", + "Upload your suite to the Giskard server to:\n", + "\n", + "- Compare models to decide which model to promote\n", + "- Debug your tests to diagnose the issues\n", + "- Create more domain-specific tests that are integrating business feedback\n", + "- Share your results" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8efd6bf3", + "metadata": {}, + "outputs": [], + "source": [ + "# Uploading the test suite will automatically save the model, dataset, tests, slicing & transformation functions inside the Giskard UI server\n", + "# Create a Giskard client aftern having install the Giskard server (see documentation)\n", + "token = \"API_TOKEN\" # Find it in Settings in the Giskard server\n", + "client = giskard.GiskardClient(\n", + " url=\"http://localhost:19000\", # URL of your Giskard instance\n", + " token=token\n", + ")\n", + "\n", + "my_project = client.create_project(\"my_project\", \"PROJECT_NAME\", \"DESCRIPTION\")\n", + "\n", + "# Upload to the current project\n", + "test_suite.upload(client, \"my_project\")" + ] + }, + { + "cell_type": "markdown", + "source": [ + "
\n", + "Connecting Google Colab with the Giskard server\n", + "\n", + "If you are using Google Colab and you want to install the Giskard server **locally**, you can run the Giskard server by executing this line in the terminal of your **local** machine (see the [documentation](https://docs.giskard.ai/en/latest/guides/installation_app/index.html)):\n", + "\n", + "> giskard server start\n", + "\n", + "Once the Giskard server is running, from the same terminal on your **local** machine, you can run:\n", + "\n", + "> giskard server expose --ngrok-token \\\n", + "\n", + "Read the flowing [instructions](https://docs.giskard.ai/en/latest/cli/ngrok/index.html) in order to get the `ngrok_API_token`. This will provide you with the code snippets that you can copy and paste into your Colab notebook to establish a connection with your locally installed Giskard server\n", + "
" + ], + "metadata": { + "collapsed": false + }, + "id": "7f594b5a762b09" + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.11" } - ], - "source": [ - "display(results) # in your notebook" - ] - }, - { - "cell_type": "markdown", - "id": "8b3343fa", - "metadata": {}, - "source": [ - "As you see above, the model may detect various vulnerabilites by displaying:\n", - "\n", - "* Data slices showing unperformance, underconfidence, overconfidence or spurious correlations\n", - "* Data transformations creating robutness or ethical issues\n", - "* Examples making some tests fail\n" - ] - }, - { - "cell_type": "markdown", - "id": "0667bdc9", - "metadata": {}, - "source": [ - "## Generate a test suite from the Scan\n", - "\n", - "The objects produced by the scan can be used as fixtures to generate a test suite that integrate domain-specific issues. To create custom tests, refer to the [Test your ML Model](https://docs.giskard.ai/en/latest/guides/test-suite/index.html) page." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "bea736a9", - "metadata": {}, - "outputs": [], - "source": [ - "test_suite = results.generate_test_suite(\"My first test suite\")\n", - "\n", - "# You can run the test suite locally to verify that it reproduces the issues\n", - "test_suite.run()" - ] - }, - { - "cell_type": "markdown", - "id": "882f4638", - "metadata": {}, - "source": [ - "## Customize your suite by loading objects from the Giskard catalog" - ] - }, - { - "cell_type": "markdown", - "id": "502a0767", - "metadata": {}, - "source": [ - "The Giskard open source catalog will enable to load:\n", - "\n", - "* **Tests** such as metamorphic, performance, prediction & data drift, statistical tests, etc\n", - "* **Slicing functions** such as detectors of toxicity, hate, emotion, etc\n", - "* **Transformation functions** such as generators of typos, paraphrase, style tune, etc\n", - "\n", - "For demo purposes, we will load a simple unit test ([test_right_label](https://docs.giskard.ai/en/latest/reference/tests/statistic.html#giskard.testing.test_right_label)) that checks if a given row (the first example) has the right label. For more examples of tests and functions, refer to the [Giskard catalog](https://docs.giskard.ai/en/latest/guides/catalog/index.html)." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "b0c3a076", - "metadata": {}, - "outputs": [], - "source": [ - "# For the test_right_label test we are adding, all the parameters are specified except model\n", - "# This means that we will need to specify model everytime we run the suite: model is a global parameter of the suite\n", - "suite = test_suite \\\n", - " .add_test(\n", - " giskard.testing.test_right_label(dataset=dataset.iloc[[1]], classification_label=\"yes\", threshold=1)).run()" - ] - }, - { - "cell_type": "markdown", - "id": "cf824254", - "metadata": {}, - "source": [ - "## Upload your suite to the Giskard server\n", - "\n", - "
\n", - "Install Giskard Server\n", - "\n", - "To upload your suite to the Giskard Server you must first run the Giskard Server. Refer to the [documentation](https://docs.giskard.ai/en/latest/guides/installation_app/index.html).\n", - "
\n", - "\n", - "Upload your suite to the Giskard server to:\n", - "\n", - "- Compare models to decide which model to promote\n", - "- Debug your tests to diagnose the issues\n", - "- Create more domain-specific tests that are integrating business feedback\n", - "- Share your results" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "8efd6bf3", - "metadata": {}, - "outputs": [], - "source": [ - "# Uploading the test suite will automatically save the model, dataset, tests, slicing & transformation functions inside the Giskard UI server\n", - "# Create a Giskard client aftern having install the Giskard server (see documentation)\n", - "token = \"API_TOKEN\" # Find it in Settings in the Giskard server\n", - "client = giskard.GiskardClient(\n", - " url=\"http://localhost:19000\", # URL of your Giskard instance\n", - " token=token\n", - ")\n", - "\n", - "my_project = client.create_project(\"my_project\", \"PROJECT_NAME\", \"DESCRIPTION\")\n", - "\n", - "# Upload to the current project\n", - "test_suite.upload(client, \"my_project\")" - ] - }, - { - "cell_type": "markdown", - "source": [ - "
\n", - "Connecting Google Colab with the Giskard server\n", - "\n", - "If you are using Google Colab and you want to install the Giskard server **locally**, you can run the Giskard server by executing this line in the terminal of your **local** machine (see the [documentation](https://docs.giskard.ai/en/latest/guides/installation_app/index.html)):\n", - "\n", - "> giskard server start\n", - "\n", - "Once the Giskard server is running, from the same terminal on your **local** machine, you can run:\n", - "\n", - "> giskard server expose --token \\\n", - "\n", - "Read the flowing [instructions](https://docs.giskard.ai/en/latest/cli/ngrok/index.html) in order to get the `ngrok_API_token`. This will provide you with the code snippets that you can copy and paste into your Colab notebook to establish a connection with your locally installed Giskard server\n", - "
" - ], - "metadata": { - "collapsed": false - }, - "id": "7f594b5a762b09" - } - ], - "metadata": { - "kernelspec": { - "display_name": ".venv", - "language": "python", - "name": "python3" }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.10.11" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} + "nbformat": 4, + "nbformat_minor": 5 +} \ No newline at end of file diff --git a/python-client/docs/reference/notebooks/example_notebook.ipynb b/python-client/docs/reference/notebooks/example_notebook.ipynb index 5cd3bd3d43..7d07511dc7 100644 --- a/python-client/docs/reference/notebooks/example_notebook.ipynb +++ b/python-client/docs/reference/notebooks/example_notebook.ipynb @@ -1,2774 +1,2774 @@ { - "cells": [ - { - "cell_type": "markdown", - "id": "990eccb8", - "metadata": {}, - "source": [ - "# Quickstart\n", - "\n", - "
\n", - "What is Giskard ?\n", - "\n", - "Giskard is an open-source testing framework dedicated to ML models, ranging from tabular to LLM. [To know more about Giskard, click here](https://docs.giskard.ai/en/latest/getting-started/index.html).\n", - "
\n" - ] - }, - { - "cell_type": "markdown", - "id": "40f23d1a", - "metadata": {}, - "source": [ - "\n", - "By running this notebook, you'll create a whole test suite in a few lines of code. The model used here is a simple classification model with the Titanic dataset. Feel free to use your own model (tabular, text, or LLM).\n", - "\n", - "You'll learn how to:\n", - "\n", - "- Detect vulnerabilities by scanning the model\n", - "\n", - "- Generate a test suite with domain-specific tests\n", - "\n", - "- Customize your test suite by loading a test from the Giskard catalog\n", - "\n", - "- Upload your model to the Giskard server to:\n", - "\n", - " - Compare models to decide which one to promote\n", - "\n", - " - Debug your tests to diagnose issues\n", - "\n", - " - Share your results and collect business feedback from your team\n" - ] - }, - { - "cell_type": "markdown", - "id": "da9a6eac", - "metadata": {}, - "source": [ - "## Install Giskard\n", - "\n", - "To see the list of Python requirements, please refer to [the documentation](https://docs.giskard.ai/en/latest/guides/installation_app/index.html)." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "pip install \"giskard>=2.0.0b\" -U" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "collapsed": false - }, - "source": [ - "## Scan your model to find vulnerabilities\n", - "\n", - "With the Giskard scan feature, you can detect vulnerabilities in your model, including *performance biases*, *unrobustness*, *data leakage*, *stochasticity*, *underconfidence*, *ethical issues*, and *more*. For detailed information about the scan feature, please refer to our scan [documentation](https://docs.giskard.ai/en/latest/guides/scan/index.html)." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "139b5554", - "metadata": { - "pycharm": { - "is_executing": true - } - }, - "outputs": [], - "source": [ - "import giskard\n", - "from giskard.demo import titanic\n", - "\n", - "# Replace this with your own data & model creation.\n", - "model, df = titanic()\n", - "\n", - "# Wrap your Pandas DataFrame with Giskard.Dataset, containing examples such as:\n", - "# your test set, a golden dataset, etc.\n", - "giskard_dataset = giskard.Dataset(df=df, target=\"Survived\", name=\"Titanic dataset\")\n", - "\n", - "# Wrap your model with Giskard.Model:\n", - "# you can use any tabular, text or LLM models (PyTorch, HuggingFace, LangChain, etc.),\n", - "# for classification, regression & text generation.\n", - "giskard_model = giskard.Model(model=model, model_type=\"classification\", name=\"Titanic model\")\n", - "\n", - "# Then apply the scan\n", - "results = giskard.scan(giskard_model, giskard_dataset)" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "id": "ecb49fa5", - "metadata": {}, - "outputs": [ + "cells": [ + { + "cell_type": "markdown", + "id": "990eccb8", + "metadata": {}, + "source": [ + "# Quickstart\n", + "\n", + "
\n", + "What is Giskard ?\n", + "\n", + "Giskard is an open-source testing framework dedicated to ML models, ranging from tabular to LLM. [To know more about Giskard, click here](https://docs.giskard.ai/en/latest/getting-started/index.html).\n", + "
\n" + ] + }, + { + "cell_type": "markdown", + "id": "40f23d1a", + "metadata": {}, + "source": [ + "\n", + "By running this notebook, you'll create a whole test suite in a few lines of code. The model used here is a simple classification model with the Titanic dataset. Feel free to use your own model (tabular, text, or LLM).\n", + "\n", + "You'll learn how to:\n", + "\n", + "- Detect vulnerabilities by scanning the model\n", + "\n", + "- Generate a test suite with domain-specific tests\n", + "\n", + "- Customize your test suite by loading a test from the Giskard catalog\n", + "\n", + "- Upload your model to the Giskard server to:\n", + "\n", + " - Compare models to decide which one to promote\n", + "\n", + " - Debug your tests to diagnose issues\n", + "\n", + " - Share your results and collect business feedback from your team\n" + ] + }, + { + "cell_type": "markdown", + "id": "da9a6eac", + "metadata": {}, + "source": [ + "## Install Giskard\n", + "\n", + "To see the list of Python requirements, please refer to [the documentation](https://docs.giskard.ai/en/latest/guides/installation_app/index.html)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "pip install \"giskard>=2.0.0b\" -U" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "collapsed": false + }, + "source": [ + "## Scan your model to find vulnerabilities\n", + "\n", + "With the Giskard scan feature, you can detect vulnerabilities in your model, including *performance biases*, *unrobustness*, *data leakage*, *stochasticity*, *underconfidence*, *ethical issues*, and *more*. For detailed information about the scan feature, please refer to our scan [documentation](https://docs.giskard.ai/en/latest/guides/scan/index.html)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "139b5554", + "metadata": { + "pycharm": { + "is_executing": true + } + }, + "outputs": [], + "source": [ + "import giskard\n", + "from giskard.demo import titanic\n", + "\n", + "# Replace this with your own data & model creation.\n", + "model, df = titanic()\n", + "\n", + "# Wrap your Pandas DataFrame with Giskard.Dataset, containing examples such as:\n", + "# your test set, a golden dataset, etc.\n", + "giskard_dataset = giskard.Dataset(df=df, target=\"Survived\", name=\"Titanic dataset\")\n", + "\n", + "# Wrap your model with Giskard.Model:\n", + "# you can use any tabular, text or LLM models (PyTorch, HuggingFace, LangChain, etc.),\n", + "# for classification, regression & text generation.\n", + "giskard_model = giskard.Model(model=model, model_type=\"classification\", name=\"Titanic model\")\n", + "\n", + "# Then apply the scan\n", + "results = giskard.scan(giskard_model, giskard_dataset)" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "ecb49fa5", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "display(results) # in your notebook" + ] + }, + { + "cell_type": "markdown", + "id": "8b3343fa", + "metadata": {}, + "source": [ + "As you see above, the model may detect various vulnerabilites by displaying:\n", + "\n", + "* Data slices showing unperformance, underconfidence, overconfidence or spurious correlations\n", + "* Data transformations creating robutness or ethical issues\n", + "* Examples making some tests fail\n" + ] + }, { - "data": { - "text/html": [ - "\n", - "\n" + "cell_type": "markdown", + "id": "0667bdc9", + "metadata": {}, + "source": [ + "## Generate a test suite from the Scan\n", + "\n", + "The objects produced by the scan can be used as fixtures to generate a test suite that integrate domain-specific issues. To create custom tests, refer to the [Test your ML Model](https://docs.giskard.ai/en/latest/guides/test-suite/index.html) page." ] - }, - "metadata": {}, - "output_type": "display_data" + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bea736a9", + "metadata": {}, + "outputs": [], + "source": [ + "test_suite = results.generate_test_suite(\"My first test suite\")\n", + "\n", + "# You can run the test suite locally to verify that it reproduces the issues\n", + "test_suite.run()" + ] + }, + { + "cell_type": "markdown", + "id": "882f4638", + "metadata": {}, + "source": [ + "## Customize your suite by loading objects from the Giskard catalog" + ] + }, + { + "cell_type": "markdown", + "id": "502a0767", + "metadata": {}, + "source": [ + "The Giskard open source catalog will enable to load:\n", + "\n", + "* **Tests** such as metamorphic, performance, prediction & data drift, statistical tests, etc\n", + "* **Slicing functions** such as detectors of toxicity, hate, emotion, etc\n", + "* **Transformation functions** such as generators of typos, paraphrase, style tune, etc\n", + "\n", + "For demo purposes, we will load a simple unit test ([test_right_label](https://docs.giskard.ai/en/latest/reference/tests/statistic.html#giskard.testing.test_right_label)) that checks if a given row (the first example) has the right label. For more examples of tests and functions, refer to the [Giskard catalog](https://docs.giskard.ai/en/latest/guides/catalog/index.html)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0c3a076", + "metadata": {}, + "outputs": [], + "source": [ + "# For the test_right_label test we are adding, all the parameters are specified except model\n", + "# This means that we will need to specify model everytime we run the suite: model is a global parameter of the suite\n", + "suite = test_suite \\\n", + " .add_test(\n", + " giskard.testing.test_right_label(dataset=giskard_dataset.iloc[[1]], classification_label=\"yes\", threshold=1)).run()" + ] + }, + { + "cell_type": "markdown", + "id": "cf824254", + "metadata": {}, + "source": [ + "## Upload your suite to the Giskard server\n", + "\n", + "
\n", + "Install Giskard Server\n", + "\n", + "To upload your suite to the Giskard Server you must first run the Giskard Server. Refer to the [documentation](https://docs.giskard.ai/en/latest/guides/installation_app/index.html).\n", + "
\n", + "\n", + "Upload your suite to the Giskard server to:\n", + "\n", + "- Compare models to decide which model to promote\n", + "- Debug your tests to diagnose the issues\n", + "- Create more domain-specific tests that are integrating business feedback\n", + "- Share your results" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8efd6bf3", + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "\n", + "#Uploading the test suite will automatically save the model, dataset, tests, slicing & transformation functions inside the Giskard UI server\n", + "# Create a Giskard client aftern having install the Giskard server (see documentation)\n", + "token = \"API_TOKEN\" # Find it in Settings in the Giskard server\n", + "client = GiskardClient(\n", + " url=\"http://localhost:19000\", # URL of your Giskard instance\n", + " token=token\n", + ")\n", + "\n", + "my_project = client.create_project(\"my_project\", \"PROJECT_NAME\", \"DESCRIPTION\")\n", + "\n", + "# Upload to the current project ✉️\n", + "test_suite.upload(client, \"my_project\")" + ] + }, + { + "cell_type": "markdown", + "source": [ + "
\n", + "Connecting Google Colab with the Giskard server\n", + "\n", + "If you are using Google Colab and you want to install the Giskard server **locally**, you can run the Giskard server by executing this line in the terminal of your **local** machine (see the [documentation](https://docs.giskard.ai/en/latest/guides/installation_app/index.html)):\n", + "\n", + "> giskard server start\n", + "\n", + "Once the Giskard server is running, from the same terminal on your **local** machine, you can run:\n", + "\n", + "> giskard server expose --ngrok-token \\\n", + "\n", + "Read the flowing [instructions](https://docs.giskard.ai/en/latest/cli/ngrok/index.html) in order to get the `ngrok_API_token`. This will provide you with the code snippets that you can copy and paste into your Colab notebook to establish a connection with your locally installed Giskard server\n", + "
" + ], + "metadata": { + "collapsed": false + } + } + ], + "metadata": { + "kernelspec": { + "display_name": "venv", + "language": "python", + "name": "venv" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.4" } - ], - "source": [ - "display(results) # in your notebook" - ] - }, - { - "cell_type": "markdown", - "id": "8b3343fa", - "metadata": {}, - "source": [ - "As you see above, the model may detect various vulnerabilites by displaying:\n", - "\n", - "* Data slices showing unperformance, underconfidence, overconfidence or spurious correlations\n", - "* Data transformations creating robutness or ethical issues\n", - "* Examples making some tests fail\n" - ] - }, - { - "cell_type": "markdown", - "id": "0667bdc9", - "metadata": {}, - "source": [ - "## Generate a test suite from the Scan\n", - "\n", - "The objects produced by the scan can be used as fixtures to generate a test suite that integrate domain-specific issues. To create custom tests, refer to the [Test your ML Model](https://docs.giskard.ai/en/latest/guides/test-suite/index.html) page." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "bea736a9", - "metadata": {}, - "outputs": [], - "source": [ - "test_suite = results.generate_test_suite(\"My first test suite\")\n", - "\n", - "# You can run the test suite locally to verify that it reproduces the issues\n", - "test_suite.run()" - ] - }, - { - "cell_type": "markdown", - "id": "882f4638", - "metadata": {}, - "source": [ - "## Customize your suite by loading objects from the Giskard catalog" - ] - }, - { - "cell_type": "markdown", - "id": "502a0767", - "metadata": {}, - "source": [ - "The Giskard open source catalog will enable to load:\n", - "\n", - "* **Tests** such as metamorphic, performance, prediction & data drift, statistical tests, etc\n", - "* **Slicing functions** such as detectors of toxicity, hate, emotion, etc\n", - "* **Transformation functions** such as generators of typos, paraphrase, style tune, etc\n", - "\n", - "For demo purposes, we will load a simple unit test ([test_right_label](https://docs.giskard.ai/en/latest/reference/tests/statistic.html#giskard.testing.test_right_label)) that checks if a given row (the first example) has the right label. For more examples of tests and functions, refer to the [Giskard catalog](https://docs.giskard.ai/en/latest/guides/catalog/index.html)." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "b0c3a076", - "metadata": {}, - "outputs": [], - "source": [ - "# For the test_right_label test we are adding, all the parameters are specified except model\n", - "# This means that we will need to specify model everytime we run the suite: model is a global parameter of the suite\n", - "suite = test_suite \\\n", - " .add_test(\n", - " giskard.testing.test_right_label(dataset=giskard_dataset.iloc[[1]], classification_label=\"yes\", threshold=1)).run()" - ] - }, - { - "cell_type": "markdown", - "id": "cf824254", - "metadata": {}, - "source": [ - "## Upload your suite to the Giskard server\n", - "\n", - "
\n", - "Install Giskard Server\n", - "\n", - "To upload your suite to the Giskard Server you must first run the Giskard Server. Refer to the [documentation](https://docs.giskard.ai/en/latest/guides/installation_app/index.html).\n", - "
\n", - "\n", - "Upload your suite to the Giskard server to:\n", - "\n", - "- Compare models to decide which model to promote\n", - "- Debug your tests to diagnose the issues\n", - "- Create more domain-specific tests that are integrating business feedback\n", - "- Share your results" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "8efd6bf3", - "metadata": {}, - "outputs": [], - "source": [ - "\n", - "\n", - "#Uploading the test suite will automatically save the model, dataset, tests, slicing & transformation functions inside the Giskard UI server\n", - "# Create a Giskard client aftern having install the Giskard server (see documentation)\n", - "token = \"API_TOKEN\" # Find it in Settings in the Giskard server\n", - "client = GiskardClient(\n", - " url=\"http://localhost:19000\", # URL of your Giskard instance\n", - " token=token\n", - ")\n", - "\n", - "my_project = client.create_project(\"my_project\", \"PROJECT_NAME\", \"DESCRIPTION\")\n", - "\n", - "# Upload to the current project ✉️\n", - "test_suite.upload(client, \"my_project\")" - ] - }, - { - "cell_type": "markdown", - "source": [ - "
\n", - "Connecting Google Colab with the Giskard server\n", - "\n", - "If you are using Google Colab and you want to install the Giskard server **locally**, you can run the Giskard server by executing this line in the terminal of your **local** machine (see the [documentation](https://docs.giskard.ai/en/latest/guides/installation_app/index.html)):\n", - "\n", - "> giskard server start\n", - "\n", - "Once the Giskard server is running, from the same terminal on your **local** machine, you can run:\n", - "\n", - "> giskard server expose --token \\\n", - "\n", - "Read the flowing [instructions](https://docs.giskard.ai/en/latest/cli/ngrok/index.html) in order to get the `ngrok_API_token`. This will provide you with the code snippets that you can copy and paste into your Colab notebook to establish a connection with your locally installed Giskard server\n", - "
" - ], - "metadata": { - "collapsed": false - } - } - ], - "metadata": { - "kernelspec": { - "display_name": "venv", - "language": "python", - "name": "venv" }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.10.4" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} + "nbformat": 4, + "nbformat_minor": 5 +} \ No newline at end of file diff --git a/python-client/docs/reference/notebooks/fake_real_news_classification.ipynb b/python-client/docs/reference/notebooks/fake_real_news_classification.ipynb index 2c457b5b32..3bca503014 100644 --- a/python-client/docs/reference/notebooks/fake_real_news_classification.ipynb +++ b/python-client/docs/reference/notebooks/fake_real_news_classification.ipynb @@ -1,2104 +1,2104 @@ { - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Fake/real news classification [tensorflow (keras)]\n", - "* Binary classification of news being fake or real, based on their text.\n", - "* Reference notebook: \n", - "* Dataset: \n", - "\n", - "By running this notebook, you’ll create a whole test suite in a few lines of code. The model used here is an LSTM neural network, which predicts, whether news is real or fake based on its contents. Feel free to use your own model (tabular, text, or LLM).\n", - "\n", - "You’ll learn how to:\n", - "\n", - "* Detect vulnerabilities by scanning the model\n", - "* Generate a test suite with domain-specific tests\n", - "* Customize your test suite by loading a test from the Giskard catalog\n", - "* Upload your model to the Giskard server to:\n", - " * Compare models to decide which one to promote\n", - " * Debug your tests to diagnose issues\n", - " * Share your results and collect business feedback from your team" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "collapsed": false - }, - "source": [ - "## Install Giskard" - ] - }, - { - "cell_type": "code", - "execution_count": 21, - "metadata": { - "ExecuteTime": { - "end_time": "2023-08-23T15:04:52.744167Z", - "start_time": "2023-08-23T15:04:52.232875Z" + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Fake/real news classification [tensorflow (keras)]\n", + "* Binary classification of news being fake or real, based on their text.\n", + "* Reference notebook: \n", + "* Dataset: \n", + "\n", + "By running this notebook, you’ll create a whole test suite in a few lines of code. The model used here is an LSTM neural network, which predicts, whether news is real or fake based on its contents. Feel free to use your own model (tabular, text, or LLM).\n", + "\n", + "You’ll learn how to:\n", + "\n", + "* Detect vulnerabilities by scanning the model\n", + "* Generate a test suite with domain-specific tests\n", + "* Customize your test suite by loading a test from the Giskard catalog\n", + "* Upload your model to the Giskard server to:\n", + " * Compare models to decide which one to promote\n", + " * Debug your tests to diagnose issues\n", + " * Share your results and collect business feedback from your team" + ] }, - "collapsed": false - }, - "outputs": [], - "source": [ - "# !pip install \"giskard>=2.0.0b\" -U" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Import libraries" - ] - }, - { - "cell_type": "code", - "execution_count": 22, - "metadata": { - "ExecuteTime": { - "end_time": "2023-08-23T15:04:52.884200Z", - "start_time": "2023-08-23T15:04:52.539341Z" + { + "cell_type": "markdown", + "metadata": { + "collapsed": false + }, + "source": [ + "## Install Giskard" + ] }, - "_cell_guid": "79c7e3d0-c299-4dcb-8224-4455121ee9b0", - "_uuid": "d629ff2d2480ee46fbb7e2d37f6b5fab8052498a", - "trusted": true - }, - "outputs": [], - "source": [ - "import os\n", - "os.environ['TF_CPP_MIN_LOG_LEVEL'] = '1'\n", - "import random\n", - "import string\n", - "from pathlib import Path\n", - "from typing import Tuple, Callable\n", - "from urllib.request import urlretrieve\n", - "\n", - "import numpy as np\n", - "import keras.utils\n", - "import pandas as pd\n", - "from nltk.corpus import stopwords\n", - "from keras.optimizers import Adam\n", - "from keras.models import Sequential\n", - "from keras.utils import pad_sequences\n", - "from sklearn.metrics import accuracy_score\n", - "from keras.preprocessing.text import Tokenizer\n", - "from keras.layers import Dense, Embedding, LSTM\n", - "from sklearn.model_selection import train_test_split\n", - "\n", - "import giskard\n", - "from giskard import Dataset, Model, testing\n", - "from giskard.client.giskard_client import GiskardClient" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "collapsed": false - }, - "source": [ - "## Define constants" - ] - }, - { - "cell_type": "code", - "execution_count": 23, - "metadata": { - "ExecuteTime": { - "end_time": "2023-08-23T15:04:55.532318Z", - "start_time": "2023-08-23T15:04:53.629211Z" + { + "cell_type": "code", + "execution_count": 21, + "metadata": { + "ExecuteTime": { + "end_time": "2023-08-23T15:04:52.744167Z", + "start_time": "2023-08-23T15:04:52.232875Z" + }, + "collapsed": false + }, + "outputs": [], + "source": [ + "# !pip install \"giskard>=2.0.0b\" -U" + ] }, - "collapsed": false - }, - "outputs": [], - "source": [ - "# Constants.\n", - "MAX_TOKENS = 20000\n", - "MAX_SEQUENCE_LENGTH = 100\n", - "N_ROWS = 2000\n", - "\n", - "STOPWORDS = stopwords.words('english')\n", - "\n", - "TEXT_COLUMN_NAME = \"text\"\n", - "TARGET_COLUMN_NAME = \"isFake\"\n", - "\n", - "RANDOM_SEED = 0\n", - "\n", - "# Paths.\n", - "DATA_URL = \"ftp://sys.giskard.ai/pub/unit_test_resources/fake_real_news_dataset/{}\"\n", - "DATA_PATH = Path.home() / \".giskard\" / \"fake_real_news_dataset\"" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "collapsed": false - }, - "source": [ - "## Dataset preparation" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Load and preprocess data" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "trusted": true - }, - "outputs": [], - "source": [ - "def fetch_from_ftp(url: str, file: Path) -> None:\n", - " \"\"\"Helper to fetch data from the FTP server.\"\"\"\n", - " if not file.parent.exists():\n", - " file.parent.mkdir(parents=True, exist_ok=True)\n", - "\n", - " if not file.exists():\n", - " print(f\"Downloading data from {url}\")\n", - " urlretrieve(url, file)\n", - "\n", - " print(f\"Data was loaded!\")\n", - "\n", - "\n", - "def fetch_dataset() -> None:\n", - " \"\"\"Gradually fetch all necessary files from the FTP server.\"\"\"\n", - " files_to_fetch = (\"Fake.csv\", \"True.csv\", \"glove_100d.txt\")\n", - " for file_name in files_to_fetch:\n", - " fetch_from_ftp(DATA_URL.format(file_name), DATA_PATH / file_name)\n", - "\n", - "\n", - "def load_data(**kwargs) -> pd.DataFrame:\n", - " \"\"\"Load data.\"\"\"\n", - " real_df = pd.read_csv(DATA_PATH / \"True.csv\", **kwargs)\n", - " fake_df = pd.read_csv(DATA_PATH / \"Fake.csv\", **kwargs)\n", - " \n", - " # Create target column.\n", - " real_df[TARGET_COLUMN_NAME] = 0\n", - " fake_df[TARGET_COLUMN_NAME] = 1\n", - "\n", - " # Combine dfs.\n", - " full_df = pd.concat([real_df, fake_df])\n", - " full_df.drop(columns=[\"subject\", \"date\"], inplace=True)\n", - " return full_df\n", - "\n", - "\n", - "fetch_dataset()\n", - "news_df = load_data(nrows=N_ROWS)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "collapsed": false - }, - "source": [ - "### Train-test split" - ] - }, - { - "cell_type": "code", - "execution_count": 25, - "metadata": { - "ExecuteTime": { - "end_time": "2023-08-23T15:04:56.290114Z", - "start_time": "2023-08-23T15:04:56.274090Z" + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Import libraries" + ] }, - "collapsed": false - }, - "outputs": [], - "source": [ - "X_train, X_test, Y_train, Y_test = train_test_split(news_df[[\"title\", TEXT_COLUMN_NAME]], news_df[TARGET_COLUMN_NAME], random_state=RANDOM_SEED)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "collapsed": false - }, - "source": [] - }, - { - "cell_type": "markdown", - "metadata": { - "collapsed": false - }, - "source": [ - "### Wrap data with Giskard" - ] - }, - { - "cell_type": "code", - "execution_count": 26, - "metadata": { - "ExecuteTime": { - "end_time": "2023-08-23T15:04:56.377103Z", - "start_time": "2023-08-23T15:04:56.304323Z" + { + "cell_type": "code", + "execution_count": 22, + "metadata": { + "ExecuteTime": { + "end_time": "2023-08-23T15:04:52.884200Z", + "start_time": "2023-08-23T15:04:52.539341Z" + }, + "_cell_guid": "79c7e3d0-c299-4dcb-8224-4455121ee9b0", + "_uuid": "d629ff2d2480ee46fbb7e2d37f6b5fab8052498a", + "trusted": true + }, + "outputs": [], + "source": [ + "import os\n", + "os.environ['TF_CPP_MIN_LOG_LEVEL'] = '1'\n", + "import random\n", + "import string\n", + "from pathlib import Path\n", + "from typing import Tuple, Callable\n", + "from urllib.request import urlretrieve\n", + "\n", + "import numpy as np\n", + "import keras.utils\n", + "import pandas as pd\n", + "from nltk.corpus import stopwords\n", + "from keras.optimizers import Adam\n", + "from keras.models import Sequential\n", + "from keras.utils import pad_sequences\n", + "from sklearn.metrics import accuracy_score\n", + "from keras.preprocessing.text import Tokenizer\n", + "from keras.layers import Dense, Embedding, LSTM\n", + "from sklearn.model_selection import train_test_split\n", + "\n", + "import giskard\n", + "from giskard import Dataset, Model, testing\n", + "from giskard.client.giskard_client import GiskardClient" + ] }, - "collapsed": false - }, - "outputs": [], - "source": [ - "raw_data = pd.concat([X_test, Y_test], axis=1)\n", - "wrapped_data = Dataset(\n", - " df=raw_data, # A pandas.DataFrame that contains the raw data (before all the pre-processing steps) and the actual ground truth variable (target).\n", - " target=TARGET_COLUMN_NAME, # Ground truth variable.\n", - " name=\"fake_and_real_news\" # Optional.\n", - ") " - ] - }, - { - "cell_type": "markdown", - "metadata": { - "collapsed": false - }, - "source": [ - "## Model training" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "collapsed": false - }, - "source": [ - "### Define preprocessing steps" - ] - }, - { - "cell_type": "code", - "execution_count": 27, - "metadata": { - "ExecuteTime": { - "end_time": "2023-08-23T15:05:11.521090Z", - "start_time": "2023-08-23T15:04:57.535950Z" + { + "cell_type": "markdown", + "metadata": { + "collapsed": false + }, + "source": [ + "## Define constants" + ] }, - "collapsed": false - }, - "outputs": [], - "source": [ - "def prepare_text(df: pd.DataFrame) -> np.ndarray:\n", - " \"\"\"Perform text-data cleaning: punctuation and stop words removal.\"\"\"\n", - " # Merge text data into single column.\n", - " df[TEXT_COLUMN_NAME] = df[TEXT_COLUMN_NAME] + \" \" + df.title\n", - " df.drop(columns=[\"title\"], inplace=True)\n", - "\n", - " # Remove punctuation.\n", - " df[TEXT_COLUMN_NAME] = df[TEXT_COLUMN_NAME].apply(\n", - " lambda text: text.translate(str.maketrans('', '', string.punctuation)))\n", - "\n", - " # Remove stop words.\n", - " df[TEXT_COLUMN_NAME] = df[TEXT_COLUMN_NAME].apply(\n", - " lambda sentence: ' '.join([_word for _word in sentence.split() if _word.lower() not in STOPWORDS]))\n", - "\n", - " return df[TEXT_COLUMN_NAME]\n", - "\n", - "\n", - "X_train_prepared = prepare_text(X_train)\n", - "X_test_prepared = prepare_text(X_test)\n", - "\n", - "\n", - "def init_tokenizer() -> Tuple[Callable, Tokenizer]:\n", - " \"\"\"Initialize tokenization function with the Tokenizer in it's outer-scope.\"\"\"\n", - " tokenizer = Tokenizer(num_words=MAX_TOKENS)\n", - " tokenizer.fit_on_texts(X_train_prepared)\n", - "\n", - "\n", - " def tokenization_closure(df: pd.DataFrame) -> pd.DataFrame:\n", - " tokenized = tokenizer.texts_to_sequences(df)\n", - " return pad_sequences(tokenized, maxlen=MAX_SEQUENCE_LENGTH)\n", - "\n", - "\n", - " return tokenization_closure, tokenizer\n", - "\n", - "\n", - "tokenize, text_tokenizer = init_tokenizer()\n", - "X_train_tokens = tokenize(X_train_prepared)\n", - "X_test_tokens = tokenize(X_test_prepared)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "collapsed": false - }, - "source": [ - "### Create embeddings matrix" - ] - }, - { - "cell_type": "code", - "execution_count": 28, - "metadata": { - "ExecuteTime": { - "end_time": "2023-08-23T15:05:53.293359Z", - "start_time": "2023-08-23T15:05:11.522965Z" + { + "cell_type": "code", + "execution_count": 23, + "metadata": { + "ExecuteTime": { + "end_time": "2023-08-23T15:04:55.532318Z", + "start_time": "2023-08-23T15:04:53.629211Z" + }, + "collapsed": false + }, + "outputs": [], + "source": [ + "# Constants.\n", + "MAX_TOKENS = 20000\n", + "MAX_SEQUENCE_LENGTH = 100\n", + "N_ROWS = 2000\n", + "\n", + "STOPWORDS = stopwords.words('english')\n", + "\n", + "TEXT_COLUMN_NAME = \"text\"\n", + "TARGET_COLUMN_NAME = \"isFake\"\n", + "\n", + "RANDOM_SEED = 0\n", + "\n", + "# Paths.\n", + "DATA_URL = \"ftp://sys.giskard.ai/pub/unit_test_resources/fake_real_news_dataset/{}\"\n", + "DATA_PATH = Path.home() / \".giskard\" / \"fake_real_news_dataset\"" + ] }, - "_kg_hide-output": true, - "trusted": true - }, - "outputs": [], - "source": [ - "def parse_line(word: str, *arr: list) -> Tuple[str, np.ndarray]:\n", - " \"\"\"Parse line from the file with embeddings.\n", - " The first value of the line is the word and the rest values are related glove embedding: (, 0.66, 0.23, ...).\"\"\"\n", - " return word, np.asarray(arr, dtype='float32')\n", - "\n", - "\n", - "def init_embeddings_matrix(embeddings_dict: dict) -> np.ndarray:\n", - " \"\"\"Init a matrix, where each row is an embedding vector.\"\"\"\n", - " num_embeddings = min(MAX_TOKENS, len(text_tokenizer.word_index))\n", - " stacked_embeddings = np.stack(list(embeddings_dict.values()))\n", - " embeddings_mean, embeddings_std, embeddings_dimension = stacked_embeddings.mean(), stacked_embeddings.std(), stacked_embeddings.shape[1]\n", - " embeddings_matrix = np.random.normal(embeddings_mean, embeddings_std, (num_embeddings, embeddings_dimension))\n", - " return embeddings_matrix\n", - "\n", - "\n", - "def get_embeddings_matrix() -> np.ndarray:\n", - " \"\"\"Create matrix, where each row is an embedding of a specific word.\"\"\"\n", - " # Load glove embeddings.\n", - " embeddings_dict = dict(parse_line(*line.rstrip().rsplit(' ')) for line in open(DATA_PATH / \"glove_100d.txt\"))\n", - "\n", - " # Create embeddings matrix with glove word vectors.\n", - " embeddings_matrix = init_embeddings_matrix(embeddings_dict)\n", - " for word, idx in text_tokenizer.word_index.items():\n", - " if idx >= MAX_TOKENS:\n", - " continue\n", - "\n", - " embedding_vector = embeddings_dict.get(word, None)\n", - "\n", - " if embedding_vector is not None:\n", - " embeddings_matrix[idx] = embedding_vector\n", - " \n", - " return embeddings_matrix\n", - "\n", - "\n", - "embed_matrix = get_embeddings_matrix()" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "collapsed": false - }, - "source": [ - "### Build estimator" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "trusted": true - }, - "outputs": [], - "source": [ - "def init_model() -> Sequential:\n", - " \"\"\"Initialize new TF model.\"\"\"\n", - " # Define model container.\n", - " model = Sequential()\n", - "\n", - " # Non-trainable embedding layer.\n", - " model.add(Embedding(MAX_TOKENS, output_dim=100, weights=[embed_matrix], input_length=MAX_SEQUENCE_LENGTH, trainable=False))\n", - "\n", - " # LSTM stage.\n", - " model.add(LSTM(units=32 , return_sequences=True , recurrent_dropout=0.25, dropout=0.25))\n", - " model.add(LSTM(units=16 , recurrent_dropout=0.1 , dropout=0.1))\n", - "\n", - " # Dense stage.\n", - " model.add(Dense(units=16 , activation='relu'))\n", - " model.add(Dense(units=1, activation='sigmoid'))\n", - "\n", - " # Build model.\n", - " model.compile(optimizer=Adam(learning_rate=0.01), loss='binary_crossentropy', metrics=['accuracy'])\n", - " return model\n", - "\n", - "\n", - "# Fit model.\n", - "n_epochs = 5\n", - "batch_size = 256\n", - "\n", - "classifier = init_model()\n", - "_ = classifier.fit(X_train_tokens, Y_train, batch_size=batch_size, validation_data=(X_test_tokens, Y_test), epochs=n_epochs)\n", - "\n", - "train_metric = classifier.evaluate(X_train_tokens, Y_train, verbose=0)[1]\n", - "test_metric = classifier.evaluate(X_test_tokens, Y_test, verbose=0)[1]\n", - "\n", - "print(f\"Train accuracy: {train_metric: .4f}\")\n", - "print(f\"Test accuracy: {test_metric: .4f}\")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "collapsed": false - }, - "source": [ - "### Wrap model with Giskard" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "def prediction_function(df: pd.DataFrame) -> np.ndarray:\n", - " \"\"\"Define a prediction function for giskard.Model.\"\"\"\n", - " tokens = tokenize(prepare_text(df))\n", - " return classifier.predict(tokens, verbose=0)\n", - "\n", - "\n", - "wrapped_model = Model(\n", - " model=prediction_function, # A prediction function that encapsulates all the data pre-processing steps and that could be executed with the dataset used by the scan.\n", - " model_type=\"classification\", # Either regression, classification or text_generation.\n", - " name=\"fake_real_news_classification\", # Optional.\n", - " feature_names=[\"title\", TEXT_COLUMN_NAME], # Default: all columns of your dataset.\n", - " classification_labels=[0, 1], # Their order MUST be identical to the prediction_function's output order. \n", - " # classification_threshold=0.5 # Default: 0.5\n", - ")\n", - "\n", - "# Evaluate wrapped model.\n", - "Y_test_pred_wrapper = wrapped_model.predict(wrapped_data).prediction\n", - "wrapped_test_metric = accuracy_score(Y_test, Y_test_pred_wrapper)\n", - "print(f\"Wrapped test accuracy: {wrapped_test_metric: .4f}\")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "collapsed": false - }, - "source": [ - "## Scan model" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "scanning_results = giskard.scan(wrapped_model, wrapped_data)" - ] - }, - { - "cell_type": "code", - "execution_count": 17, - "metadata": { - "ExecuteTime": { - "end_time": "2023-08-23T14:07:07.021073Z", - "start_time": "2023-08-23T14:07:03.165294Z" + { + "cell_type": "markdown", + "metadata": { + "collapsed": false + }, + "source": [ + "## Dataset preparation" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Load and preprocess data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "trusted": true + }, + "outputs": [], + "source": [ + "def fetch_from_ftp(url: str, file: Path) -> None:\n", + " \"\"\"Helper to fetch data from the FTP server.\"\"\"\n", + " if not file.parent.exists():\n", + " file.parent.mkdir(parents=True, exist_ok=True)\n", + "\n", + " if not file.exists():\n", + " print(f\"Downloading data from {url}\")\n", + " urlretrieve(url, file)\n", + "\n", + " print(f\"Data was loaded!\")\n", + "\n", + "\n", + "def fetch_dataset() -> None:\n", + " \"\"\"Gradually fetch all necessary files from the FTP server.\"\"\"\n", + " files_to_fetch = (\"Fake.csv\", \"True.csv\", \"glove_100d.txt\")\n", + " for file_name in files_to_fetch:\n", + " fetch_from_ftp(DATA_URL.format(file_name), DATA_PATH / file_name)\n", + "\n", + "\n", + "def load_data(**kwargs) -> pd.DataFrame:\n", + " \"\"\"Load data.\"\"\"\n", + " real_df = pd.read_csv(DATA_PATH / \"True.csv\", **kwargs)\n", + " fake_df = pd.read_csv(DATA_PATH / \"Fake.csv\", **kwargs)\n", + " \n", + " # Create target column.\n", + " real_df[TARGET_COLUMN_NAME] = 0\n", + " fake_df[TARGET_COLUMN_NAME] = 1\n", + "\n", + " # Combine dfs.\n", + " full_df = pd.concat([real_df, fake_df])\n", + " full_df.drop(columns=[\"subject\", \"date\"], inplace=True)\n", + " return full_df\n", + "\n", + "\n", + "fetch_dataset()\n", + "news_df = load_data(nrows=N_ROWS)" + ] }, - "collapsed": false - }, - "outputs": [ { - "data": { - "text/html": [ - "\n", - "" + "cell_type": "markdown", + "metadata": { + "collapsed": false + }, + "source": [ + "### Train-test split" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": { + "ExecuteTime": { + "end_time": "2023-08-23T15:04:56.290114Z", + "start_time": "2023-08-23T15:04:56.274090Z" + }, + "collapsed": false + }, + "outputs": [], + "source": [ + "X_train, X_test, Y_train, Y_test = train_test_split(news_df[[\"title\", TEXT_COLUMN_NAME]], news_df[TARGET_COLUMN_NAME], random_state=RANDOM_SEED)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "collapsed": false + }, + "source": [] + }, + { + "cell_type": "markdown", + "metadata": { + "collapsed": false + }, + "source": [ + "### Wrap data with Giskard" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": { + "ExecuteTime": { + "end_time": "2023-08-23T15:04:56.377103Z", + "start_time": "2023-08-23T15:04:56.304323Z" + }, + "collapsed": false + }, + "outputs": [], + "source": [ + "raw_data = pd.concat([X_test, Y_test], axis=1)\n", + "wrapped_data = Dataset(\n", + " df=raw_data, # A pandas.DataFrame that contains the raw data (before all the pre-processing steps) and the actual ground truth variable (target).\n", + " target=TARGET_COLUMN_NAME, # Ground truth variable.\n", + " name=\"fake_and_real_news\" # Optional.\n", + ") " + ] + }, + { + "cell_type": "markdown", + "metadata": { + "collapsed": false + }, + "source": [ + "## Model training" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "collapsed": false + }, + "source": [ + "### Define preprocessing steps" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": { + "ExecuteTime": { + "end_time": "2023-08-23T15:05:11.521090Z", + "start_time": "2023-08-23T15:04:57.535950Z" + }, + "collapsed": false + }, + "outputs": [], + "source": [ + "def prepare_text(df: pd.DataFrame) -> np.ndarray:\n", + " \"\"\"Perform text-data cleaning: punctuation and stop words removal.\"\"\"\n", + " # Merge text data into single column.\n", + " df[TEXT_COLUMN_NAME] = df[TEXT_COLUMN_NAME] + \" \" + df.title\n", + " df.drop(columns=[\"title\"], inplace=True)\n", + "\n", + " # Remove punctuation.\n", + " df[TEXT_COLUMN_NAME] = df[TEXT_COLUMN_NAME].apply(\n", + " lambda text: text.translate(str.maketrans('', '', string.punctuation)))\n", + "\n", + " # Remove stop words.\n", + " df[TEXT_COLUMN_NAME] = df[TEXT_COLUMN_NAME].apply(\n", + " lambda sentence: ' '.join([_word for _word in sentence.split() if _word.lower() not in STOPWORDS]))\n", + "\n", + " return df[TEXT_COLUMN_NAME]\n", + "\n", + "\n", + "X_train_prepared = prepare_text(X_train)\n", + "X_test_prepared = prepare_text(X_test)\n", + "\n", + "\n", + "def init_tokenizer() -> Tuple[Callable, Tokenizer]:\n", + " \"\"\"Initialize tokenization function with the Tokenizer in it's outer-scope.\"\"\"\n", + " tokenizer = Tokenizer(num_words=MAX_TOKENS)\n", + " tokenizer.fit_on_texts(X_train_prepared)\n", + "\n", + "\n", + " def tokenization_closure(df: pd.DataFrame) -> pd.DataFrame:\n", + " tokenized = tokenizer.texts_to_sequences(df)\n", + " return pad_sequences(tokenized, maxlen=MAX_SEQUENCE_LENGTH)\n", + "\n", + "\n", + " return tokenization_closure, tokenizer\n", + "\n", + "\n", + "tokenize, text_tokenizer = init_tokenizer()\n", + "X_train_tokens = tokenize(X_train_prepared)\n", + "X_test_tokens = tokenize(X_test_prepared)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "collapsed": false + }, + "source": [ + "### Create embeddings matrix" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": { + "ExecuteTime": { + "end_time": "2023-08-23T15:05:53.293359Z", + "start_time": "2023-08-23T15:05:11.522965Z" + }, + "_kg_hide-output": true, + "trusted": true + }, + "outputs": [], + "source": [ + "def parse_line(word: str, *arr: list) -> Tuple[str, np.ndarray]:\n", + " \"\"\"Parse line from the file with embeddings.\n", + " The first value of the line is the word and the rest values are related glove embedding: (, 0.66, 0.23, ...).\"\"\"\n", + " return word, np.asarray(arr, dtype='float32')\n", + "\n", + "\n", + "def init_embeddings_matrix(embeddings_dict: dict) -> np.ndarray:\n", + " \"\"\"Init a matrix, where each row is an embedding vector.\"\"\"\n", + " num_embeddings = min(MAX_TOKENS, len(text_tokenizer.word_index))\n", + " stacked_embeddings = np.stack(list(embeddings_dict.values()))\n", + " embeddings_mean, embeddings_std, embeddings_dimension = stacked_embeddings.mean(), stacked_embeddings.std(), stacked_embeddings.shape[1]\n", + " embeddings_matrix = np.random.normal(embeddings_mean, embeddings_std, (num_embeddings, embeddings_dimension))\n", + " return embeddings_matrix\n", + "\n", + "\n", + "def get_embeddings_matrix() -> np.ndarray:\n", + " \"\"\"Create matrix, where each row is an embedding of a specific word.\"\"\"\n", + " # Load glove embeddings.\n", + " embeddings_dict = dict(parse_line(*line.rstrip().rsplit(' ')) for line in open(DATA_PATH / \"glove_100d.txt\"))\n", + "\n", + " # Create embeddings matrix with glove word vectors.\n", + " embeddings_matrix = init_embeddings_matrix(embeddings_dict)\n", + " for word, idx in text_tokenizer.word_index.items():\n", + " if idx >= MAX_TOKENS:\n", + " continue\n", + "\n", + " embedding_vector = embeddings_dict.get(word, None)\n", + "\n", + " if embedding_vector is not None:\n", + " embeddings_matrix[idx] = embedding_vector\n", + " \n", + " return embeddings_matrix\n", + "\n", + "\n", + "embed_matrix = get_embeddings_matrix()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "collapsed": false + }, + "source": [ + "### Build estimator" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "trusted": true + }, + "outputs": [], + "source": [ + "def init_model() -> Sequential:\n", + " \"\"\"Initialize new TF model.\"\"\"\n", + " # Define model container.\n", + " model = Sequential()\n", + "\n", + " # Non-trainable embedding layer.\n", + " model.add(Embedding(MAX_TOKENS, output_dim=100, weights=[embed_matrix], input_length=MAX_SEQUENCE_LENGTH, trainable=False))\n", + "\n", + " # LSTM stage.\n", + " model.add(LSTM(units=32 , return_sequences=True , recurrent_dropout=0.25, dropout=0.25))\n", + " model.add(LSTM(units=16 , recurrent_dropout=0.1 , dropout=0.1))\n", + "\n", + " # Dense stage.\n", + " model.add(Dense(units=16 , activation='relu'))\n", + " model.add(Dense(units=1, activation='sigmoid'))\n", + "\n", + " # Build model.\n", + " model.compile(optimizer=Adam(learning_rate=0.01), loss='binary_crossentropy', metrics=['accuracy'])\n", + " return model\n", + "\n", + "\n", + "# Fit model.\n", + "n_epochs = 5\n", + "batch_size = 256\n", + "\n", + "classifier = init_model()\n", + "_ = classifier.fit(X_train_tokens, Y_train, batch_size=batch_size, validation_data=(X_test_tokens, Y_test), epochs=n_epochs)\n", + "\n", + "train_metric = classifier.evaluate(X_train_tokens, Y_train, verbose=0)[1]\n", + "test_metric = classifier.evaluate(X_test_tokens, Y_test, verbose=0)[1]\n", + "\n", + "print(f\"Train accuracy: {train_metric: .4f}\")\n", + "print(f\"Test accuracy: {test_metric: .4f}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "collapsed": false + }, + "source": [ + "### Wrap model with Giskard" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "def prediction_function(df: pd.DataFrame) -> np.ndarray:\n", + " \"\"\"Define a prediction function for giskard.Model.\"\"\"\n", + " tokens = tokenize(prepare_text(df))\n", + " return classifier.predict(tokens, verbose=0)\n", + "\n", + "\n", + "wrapped_model = Model(\n", + " model=prediction_function, # A prediction function that encapsulates all the data pre-processing steps and that could be executed with the dataset used by the scan.\n", + " model_type=\"classification\", # Either regression, classification or text_generation.\n", + " name=\"fake_real_news_classification\", # Optional.\n", + " feature_names=[\"title\", TEXT_COLUMN_NAME], # Default: all columns of your dataset.\n", + " classification_labels=[0, 1], # Their order MUST be identical to the prediction_function's output order. \n", + " # classification_threshold=0.5 # Default: 0.5\n", + ")\n", + "\n", + "# Evaluate wrapped model.\n", + "Y_test_pred_wrapper = wrapped_model.predict(wrapped_data).prediction\n", + "wrapped_test_metric = accuracy_score(Y_test, Y_test_pred_wrapper)\n", + "print(f\"Wrapped test accuracy: {wrapped_test_metric: .4f}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "collapsed": false + }, + "source": [ + "## Scan model" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "scanning_results = giskard.scan(wrapped_model, wrapped_data)" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": { + "ExecuteTime": { + "end_time": "2023-08-23T14:07:07.021073Z", + "start_time": "2023-08-23T14:07:03.165294Z" + }, + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "display(scanning_results)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "collapsed": false + }, + "source": [ + "## Generate a test suite from the Scan\n", + "The objects produced by the scan can be used as fixtures to generate a test suite that integrate domain-specific issues. To create custom tests, refer to the Test your ML Model page." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "test_suite = scanning_results.generate_test_suite(\"My first test suite\")\n", + "test_suite.run()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "collapsed": false + }, + "source": [ + "## Customize your suite by loading objects from the Giskard catalog\n", + "\n", + "The Giskard open source catalog will enable to load:\n", + "\n", + "* Tests such as metamorphic, performance, prediction & data drift, statistical tests, etc\n", + "* Slicing functions such as detectors of toxicity, hate, emotion, etc\n", + "* Transformation functions such as generators of typos, paraphrase, style tune, etc\n", + "\n", + "For demo purposes, we will load a simple unit test (test_f1) that checks if the test F1 score is above the given threshold. For more examples of tests and functions, refer to the Giskard catalog." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "test_suite.add_test(testing.test_f1(model=wrapped_model, dataset=wrapped_data, threshold=0.7)).run()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "collapsed": false + }, + "source": [ + "## Upload your suite to the Giskard server\n", + "\n", + "Upload your suite to the Giskard server to:\n", + "\n", + "* Compare models to decide which model to promote\n", + "* Debug your tests to diagnose the issues\n", + "* Create more domain-specific tests that are integrating business feedback\n", + "* Share your results" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# Uploading the test suite will automatically save the model, dataset, tests, slicing & transformation functions inside the Giskard UI server\n", + "# Create a Giskard client after having install the Giskard server (see documentation)\n", + "token = \"API_TOKEN\" # Find it in Settings in the Giskard server\n", + "\n", + "client = GiskardClient(\n", + " url=\"http://localhost:19000\", # URL of your Giskard instance\n", + " token=token\n", + ")\n", + "\n", + "my_project = client.create_project(\"my_project\", \"PROJECT_NAME\", \"DESCRIPTION\")\n", + "\n", + "# Upload to the current project ✉️\n", + "test_suite.upload(client, \"my_project\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "collapsed": false + }, + "source": [ + "
\n", + "Connecting Google Colab with the Giskard server\n", + "\n", + "If you are using Google Colab and you want to install the Giskard server **locally**, you can run the Giskard server by executing this line in the terminal of your **local** machine (see the [documentation](https://docs.giskard.ai/en/latest/guides/installation_app/index.html)):\n", + "\n", + "> giskard server start\n", + "\n", + "Once the Giskard server is running, from the same terminal on your **local** machine, you can run:\n", + "\n", + "> giskard server expose --ngrok-token \\\n", + "\n", + "Read the flowing [instructions](https://docs.giskard.ai/en/latest/cli/ngrok/index.html) in order to get the `ngrok_API_token`. This will provide you with the code snippets that you can copy and paste into your Colab notebook to establish a connection with your locally installed Giskard server\n", + "
" ] - }, - "metadata": {}, - "output_type": "display_data" } - ], - "source": [ - "display(scanning_results)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "collapsed": false - }, - "source": [ - "## Generate a test suite from the Scan\n", - "The objects produced by the scan can be used as fixtures to generate a test suite that integrate domain-specific issues. To create custom tests, refer to the Test your ML Model page." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "test_suite = scanning_results.generate_test_suite(\"My first test suite\")\n", - "test_suite.run()" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "collapsed": false - }, - "source": [ - "## Customize your suite by loading objects from the Giskard catalog\n", - "\n", - "The Giskard open source catalog will enable to load:\n", - "\n", - "* Tests such as metamorphic, performance, prediction & data drift, statistical tests, etc\n", - "* Slicing functions such as detectors of toxicity, hate, emotion, etc\n", - "* Transformation functions such as generators of typos, paraphrase, style tune, etc\n", - "\n", - "For demo purposes, we will load a simple unit test (test_f1) that checks if the test F1 score is above the given threshold. For more examples of tests and functions, refer to the Giskard catalog." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "test_suite.add_test(testing.test_f1(model=wrapped_model, dataset=wrapped_data, threshold=0.7)).run()" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "collapsed": false - }, - "source": [ - "## Upload your suite to the Giskard server\n", - "\n", - "Upload your suite to the Giskard server to:\n", - "\n", - "* Compare models to decide which model to promote\n", - "* Debug your tests to diagnose the issues\n", - "* Create more domain-specific tests that are integrating business feedback\n", - "* Share your results" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "# Uploading the test suite will automatically save the model, dataset, tests, slicing & transformation functions inside the Giskard UI server\n", - "# Create a Giskard client after having install the Giskard server (see documentation)\n", - "token = \"API_TOKEN\" # Find it in Settings in the Giskard server\n", - "\n", - "client = GiskardClient(\n", - " url=\"http://localhost:19000\", # URL of your Giskard instance\n", - " token=token\n", - ")\n", - "\n", - "my_project = client.create_project(\"my_project\", \"PROJECT_NAME\", \"DESCRIPTION\")\n", - "\n", - "# Upload to the current project ✉️\n", - "test_suite.upload(client, \"my_project\")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "collapsed": false - }, - "source": [ - "
\n", - "Connecting Google Colab with the Giskard server\n", - "\n", - "If you are using Google Colab and you want to install the Giskard server **locally**, you can run the Giskard server by executing this line in the terminal of your **local** machine (see the [documentation](https://docs.giskard.ai/en/latest/guides/installation_app/index.html)):\n", - "\n", - "> giskard server start\n", - "\n", - "Once the Giskard server is running, from the same terminal on your **local** machine, you can run:\n", - "\n", - "> giskard server expose --token \\\n", - "\n", - "Read the flowing [instructions](https://docs.giskard.ai/en/latest/cli/ngrok/index.html) in order to get the `ngrok_API_token`. This will provide you with the code snippets that you can copy and paste into your Colab notebook to establish a connection with your locally installed Giskard server\n", - "
" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.6" + } }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.6.6" - } - }, - "nbformat": 4, - "nbformat_minor": 4 -} + "nbformat": 4, + "nbformat_minor": 4 +} \ No newline at end of file diff --git a/python-client/docs/reference/notebooks/hotel_text_regression.ipynb b/python-client/docs/reference/notebooks/hotel_text_regression.ipynb index c57db2ff1d..ccd0b81ad9 100644 --- a/python-client/docs/reference/notebooks/hotel_text_regression.ipynb +++ b/python-client/docs/reference/notebooks/hotel_text_regression.ipynb @@ -1,1946 +1,1946 @@ { - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "collapsed": false - }, - "source": [ - "# Regression on the hotel reviews [sklearn]\n", - "* Regression task of predicting review 'score', based on the review text.\n", - "* Reference notebook: \n", - "* Dataset: \n", - "\n", - "By running this notebook, you’ll create a whole test suite in a few lines of code. The model used here is a simple linear regression model with the hotel reviews dataset. Feel free to use your own model (tabular, text, or LLM).\n", - "\n", - "You’ll learn how to:\n", - "\n", - "* Detect vulnerabilities by scanning the model\n", - "* Generate a test suite with domain-specific tests\n", - "* Customize your test suite by loading a test from the Giskard catalog\n", - "* Upload your model to the Giskard server to:\n", - " * Compare models to decide which one to promote\n", - " * Debug your tests to diagnose issues\n", - " * Share your results and collect business feedback from your team" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "collapsed": false - }, - "source": [ - "## Install Giskard" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "pip install \"giskard>=2.0.0b\" -U" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "collapsed": false - }, - "source": [ - "## Import libraries" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "import os\n", - "from pathlib import Path\n", - "from typing import Iterable\n", - "from urllib.request import urlretrieve\n", - "\n", - "import pandas as pd\n", - "from sklearn.ensemble import GradientBoostingRegressor\n", - "from sklearn.feature_extraction.text import TfidfVectorizer\n", - "from sklearn.metrics import mean_absolute_error\n", - "from sklearn.model_selection import train_test_split\n", - "from sklearn.pipeline import Pipeline\n", - "from sklearn.preprocessing import FunctionTransformer\n", - "\n", - "from giskard import Model, Dataset, scan, testing\n", - "from giskard.client.giskard_client import GiskardClient" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "collapsed": false - }, - "source": [ - "## Define constants" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": { - "ExecuteTime": { - "end_time": "2023-08-21T13:23:32.219786Z", - "start_time": "2023-08-21T13:23:32.203261Z" + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "collapsed": false + }, + "source": [ + "# Regression on the hotel reviews [sklearn]\n", + "* Regression task of predicting review 'score', based on the review text.\n", + "* Reference notebook: \n", + "* Dataset: \n", + "\n", + "By running this notebook, you’ll create a whole test suite in a few lines of code. The model used here is a simple linear regression model with the hotel reviews dataset. Feel free to use your own model (tabular, text, or LLM).\n", + "\n", + "You’ll learn how to:\n", + "\n", + "* Detect vulnerabilities by scanning the model\n", + "* Generate a test suite with domain-specific tests\n", + "* Customize your test suite by loading a test from the Giskard catalog\n", + "* Upload your model to the Giskard server to:\n", + " * Compare models to decide which one to promote\n", + " * Debug your tests to diagnose issues\n", + " * Share your results and collect business feedback from your team" + ] }, - "collapsed": false - }, - "outputs": [], - "source": [ - "# Constants.\n", - "FEATURE_COLUMN_NAME = \"Full_Review\"\n", - "TARGET_COLUMN_NAME = \"Reviewer_Score\"\n", - "\n", - "# Paths.\n", - "DATA_URL = \"ftp://sys.giskard.ai/pub/unit_test_resources/hotel_text_regression_dataset/Hotel_Reviews.csv\"\n", - "DATA_PATH = Path.home() / \".giskard\" / \"hotel_text_regression_dataset\" / \"Hotel_Reviews.csv\"" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "collapsed": false - }, - "source": [ - "## Dataset preparation" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "_cell_guid": "2f8c1d94-114d-4b7b-a9eb-fd1761d27268", - "_uuid": "6a569b796e7aa56d58d20416b91b679d3737587b" - }, - "source": [ - "### Load data" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": { - "ExecuteTime": { - "end_time": "2023-08-21T13:23:32.273659Z", - "start_time": "2023-08-21T13:23:32.220210Z" + { + "cell_type": "markdown", + "metadata": { + "collapsed": false + }, + "source": [ + "## Install Giskard" + ] }, - "collapsed": false - }, - "outputs": [], - "source": [ - "def fetch_from_ftp(url: str, file: Path) -> None:\n", - " \"\"\"Helper to fetch data from the FTP server.\"\"\"\n", - " if not file.parent.exists():\n", - " file.parent.mkdir(parents=True, exist_ok=True)\n", - "\n", - " if not file.exists():\n", - " print(f\"Downloading data from {url}\")\n", - " urlretrieve(url, file)\n", - "\n", - " print(f\"Data was loaded!\")\n", - "\n", - "\n", - "def load_data(**kwargs) -> pd.DataFrame:\n", - " fetch_from_ftp(DATA_URL, DATA_PATH)\n", - " df = pd.read_csv(DATA_PATH, **kwargs)\n", - "\n", - " # Create target column.\n", - " df[FEATURE_COLUMN_NAME] = df.apply(lambda x: x['Positive_Review'] + ' ' + x['Negative_Review'], axis=1)\n", - "\n", - " return df" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "_cell_guid": "68914ed8-f065-450c-8899-f56542aed51d", - "_uuid": "ae9877cc3542e9372084a830a19c794863dff9cb" - }, - "outputs": [], - "source": [ - "reviews_df = load_data(nrows=1000)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "collapsed": false - }, - "source": [ - "### Train-test split" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": { - "ExecuteTime": { - "end_time": "2023-08-21T13:23:32.374452Z", - "start_time": "2023-08-21T13:23:32.338540Z" + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "pip install \"giskard>=2.0.0b\" -U" + ] }, - "_cell_guid": "1fc3041b-4143-4913-be91-522a80491717", - "_uuid": "6edbd3a2e85aced1897d44dbabf74ebfecf10110" - }, - "outputs": [], - "source": [ - "train_X, test_X, train_Y, test_Y = train_test_split(reviews_df[[FEATURE_COLUMN_NAME]], reviews_df[TARGET_COLUMN_NAME],\n", - " random_state=42)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "collapsed": false - }, - "source": [ - "### Wrap dataset with Giskard" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": { - "ExecuteTime": { - "end_time": "2023-08-21T13:24:10.470790Z", - "start_time": "2023-08-21T13:24:10.425203Z" + { + "cell_type": "markdown", + "metadata": { + "collapsed": false + }, + "source": [ + "## Import libraries" + ] }, - "collapsed": false - }, - "outputs": [], - "source": [ - "raw_data = pd.concat([test_X, test_Y], axis=1)\n", - "wrapped_data = Dataset(\n", - " df=raw_data, # A pandas.DataFrame that contains the raw data (before all the pre-processing steps) and the actual ground truth variable (target).\n", - " target=TARGET_COLUMN_NAME, # Ground truth variable.\n", - " name=\"hotel_text_regression_dataset\", # Optional.\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "collapsed": false - }, - "source": [ - "## Model training" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "collapsed": false - }, - "source": [ - "### Define preprocessing steps" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": { - "ExecuteTime": { - "end_time": "2023-08-21T13:24:13.246134Z", - "start_time": "2023-08-21T13:24:13.225691Z" + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "import os\n", + "from pathlib import Path\n", + "from typing import Iterable\n", + "from urllib.request import urlretrieve\n", + "\n", + "import pandas as pd\n", + "from sklearn.ensemble import GradientBoostingRegressor\n", + "from sklearn.feature_extraction.text import TfidfVectorizer\n", + "from sklearn.metrics import mean_absolute_error\n", + "from sklearn.model_selection import train_test_split\n", + "from sklearn.pipeline import Pipeline\n", + "from sklearn.preprocessing import FunctionTransformer\n", + "\n", + "from giskard import Model, Dataset, scan, testing\n", + "from giskard.client.giskard_client import GiskardClient" + ] }, - "collapsed": false - }, - "outputs": [], - "source": [ - "def adapt_vectorizer_input(df: pd.DataFrame) -> Iterable:\n", - " \"\"\"Adapt input for the vectorizers.\n", - "\n", - " The problem is that vectorizers accept iterable, not DataFrame, but Series.\n", - " Thus, we need to ravel dataframe with text have input single dimension.\n", - " \"\"\"\n", - "\n", - " df = df.iloc[:, 0]\n", - " return df" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "_cell_guid": "bec6edf8-eda4-4ac1-b5cb-61cccb4452f8", - "_uuid": "2c8a85cbab882133cea814280c2badada8cfb82a" - }, - "source": [ - "### Build estimator" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "# Define pipeline.\n", - "pipeline = Pipeline(steps=[\n", - " (\"vectorizer_adapter\", FunctionTransformer(adapt_vectorizer_input)),\n", - " (\"vectorizer\", TfidfVectorizer(max_features=10000)),\n", - " (\"regressor\", GradientBoostingRegressor(n_estimators=10))\n", - "])\n", - "\n", - "# Fit pipeline.\n", - "pipeline.fit(train_X, train_Y)\n", - "\n", - "# Perform inference on train and test data.\n", - "pred_train = pipeline.predict(train_X)\n", - "pred_test = pipeline.predict(test_X)\n", - "\n", - "train_metric = mean_absolute_error(train_Y, pred_train)\n", - "test_metric = mean_absolute_error(test_Y, pred_test)\n", - "\n", - "print(f\"Train MAE: {train_metric: .2f}\\n\"\n", - " f\"Test MAE: {test_metric: .2f}\")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "collapsed": false - }, - "source": [ - "### Wrap model with Giskard" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "metadata": { - "ExecuteTime": { - "end_time": "2023-08-21T13:27:05.578728Z", - "start_time": "2023-08-21T13:27:05.532042Z" + { + "cell_type": "markdown", + "metadata": { + "collapsed": false + }, + "source": [ + "## Define constants" + ] }, - "collapsed": false - }, - "outputs": [], - "source": [ - "wrapped_model = Model(\n", - " model=pipeline.predict, # A prediction function that encapsulates all the data pre-processing steps and that could be executed with the dataset used by the scan.\n", - " model_type=\"regression\", # Either regression, classification or text_generation.\n", - " name=\"hotel_text_regression\", # Optional.\n", - " feature_names=[FEATURE_COLUMN_NAME] # Default: all columns of your dataset.\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "# Validate wrapped model.\n", - "pred_test_wrapped = wrapped_model.predict(wrapped_data).raw_prediction\n", - "wrapped_test_metric = mean_absolute_error(test_Y, pred_test_wrapped)\n", - "print(f\"Wrapped Test MAE: {wrapped_test_metric: .2f}\")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "collapsed": false - }, - "source": [ - "## Scan your model to find vulnerabilities\n", - "With the Giskard scan feature, you can detect vulnerabilities in your model, including performance biases, unrobustness, data leakage, stochasticity, underconfidence, ethical issues, and more. For detailed information about the scan feature, please refer to our scan documentation." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "results = scan(wrapped_model, wrapped_data)" - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "metadata": { - "ExecuteTime": { - "end_time": "2023-08-21T13:27:31.846077Z", - "start_time": "2023-08-21T13:27:31.548602Z" + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "ExecuteTime": { + "end_time": "2023-08-21T13:23:32.219786Z", + "start_time": "2023-08-21T13:23:32.203261Z" + }, + "collapsed": false + }, + "outputs": [], + "source": [ + "# Constants.\n", + "FEATURE_COLUMN_NAME = \"Full_Review\"\n", + "TARGET_COLUMN_NAME = \"Reviewer_Score\"\n", + "\n", + "# Paths.\n", + "DATA_URL = \"ftp://sys.giskard.ai/pub/unit_test_resources/hotel_text_regression_dataset/Hotel_Reviews.csv\"\n", + "DATA_PATH = Path.home() / \".giskard\" / \"hotel_text_regression_dataset\" / \"Hotel_Reviews.csv\"" + ] }, - "collapsed": false - }, - "outputs": [ { - "data": { - "text/html": [ - "\n", - "" + "cell_type": "markdown", + "metadata": { + "collapsed": false + }, + "source": [ + "## Dataset preparation" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "_cell_guid": "2f8c1d94-114d-4b7b-a9eb-fd1761d27268", + "_uuid": "6a569b796e7aa56d58d20416b91b679d3737587b" + }, + "source": [ + "### Load data" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "ExecuteTime": { + "end_time": "2023-08-21T13:23:32.273659Z", + "start_time": "2023-08-21T13:23:32.220210Z" + }, + "collapsed": false + }, + "outputs": [], + "source": [ + "def fetch_from_ftp(url: str, file: Path) -> None:\n", + " \"\"\"Helper to fetch data from the FTP server.\"\"\"\n", + " if not file.parent.exists():\n", + " file.parent.mkdir(parents=True, exist_ok=True)\n", + "\n", + " if not file.exists():\n", + " print(f\"Downloading data from {url}\")\n", + " urlretrieve(url, file)\n", + "\n", + " print(f\"Data was loaded!\")\n", + "\n", + "\n", + "def load_data(**kwargs) -> pd.DataFrame:\n", + " fetch_from_ftp(DATA_URL, DATA_PATH)\n", + " df = pd.read_csv(DATA_PATH, **kwargs)\n", + "\n", + " # Create target column.\n", + " df[FEATURE_COLUMN_NAME] = df.apply(lambda x: x['Positive_Review'] + ' ' + x['Negative_Review'], axis=1)\n", + "\n", + " return df" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "_cell_guid": "68914ed8-f065-450c-8899-f56542aed51d", + "_uuid": "ae9877cc3542e9372084a830a19c794863dff9cb" + }, + "outputs": [], + "source": [ + "reviews_df = load_data(nrows=1000)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "collapsed": false + }, + "source": [ + "### Train-test split" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "ExecuteTime": { + "end_time": "2023-08-21T13:23:32.374452Z", + "start_time": "2023-08-21T13:23:32.338540Z" + }, + "_cell_guid": "1fc3041b-4143-4913-be91-522a80491717", + "_uuid": "6edbd3a2e85aced1897d44dbabf74ebfecf10110" + }, + "outputs": [], + "source": [ + "train_X, test_X, train_Y, test_Y = train_test_split(reviews_df[[FEATURE_COLUMN_NAME]], reviews_df[TARGET_COLUMN_NAME],\n", + " random_state=42)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "collapsed": false + }, + "source": [ + "### Wrap dataset with Giskard" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "ExecuteTime": { + "end_time": "2023-08-21T13:24:10.470790Z", + "start_time": "2023-08-21T13:24:10.425203Z" + }, + "collapsed": false + }, + "outputs": [], + "source": [ + "raw_data = pd.concat([test_X, test_Y], axis=1)\n", + "wrapped_data = Dataset(\n", + " df=raw_data, # A pandas.DataFrame that contains the raw data (before all the pre-processing steps) and the actual ground truth variable (target).\n", + " target=TARGET_COLUMN_NAME, # Ground truth variable.\n", + " name=\"hotel_text_regression_dataset\", # Optional.\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "collapsed": false + }, + "source": [ + "## Model training" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "collapsed": false + }, + "source": [ + "### Define preprocessing steps" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "ExecuteTime": { + "end_time": "2023-08-21T13:24:13.246134Z", + "start_time": "2023-08-21T13:24:13.225691Z" + }, + "collapsed": false + }, + "outputs": [], + "source": [ + "def adapt_vectorizer_input(df: pd.DataFrame) -> Iterable:\n", + " \"\"\"Adapt input for the vectorizers.\n", + "\n", + " The problem is that vectorizers accept iterable, not DataFrame, but Series.\n", + " Thus, we need to ravel dataframe with text have input single dimension.\n", + " \"\"\"\n", + "\n", + " df = df.iloc[:, 0]\n", + " return df" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "_cell_guid": "bec6edf8-eda4-4ac1-b5cb-61cccb4452f8", + "_uuid": "2c8a85cbab882133cea814280c2badada8cfb82a" + }, + "source": [ + "### Build estimator" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# Define pipeline.\n", + "pipeline = Pipeline(steps=[\n", + " (\"vectorizer_adapter\", FunctionTransformer(adapt_vectorizer_input)),\n", + " (\"vectorizer\", TfidfVectorizer(max_features=10000)),\n", + " (\"regressor\", GradientBoostingRegressor(n_estimators=10))\n", + "])\n", + "\n", + "# Fit pipeline.\n", + "pipeline.fit(train_X, train_Y)\n", + "\n", + "# Perform inference on train and test data.\n", + "pred_train = pipeline.predict(train_X)\n", + "pred_test = pipeline.predict(test_X)\n", + "\n", + "train_metric = mean_absolute_error(train_Y, pred_train)\n", + "test_metric = mean_absolute_error(test_Y, pred_test)\n", + "\n", + "print(f\"Train MAE: {train_metric: .2f}\\n\"\n", + " f\"Test MAE: {test_metric: .2f}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "collapsed": false + }, + "source": [ + "### Wrap model with Giskard" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": { + "ExecuteTime": { + "end_time": "2023-08-21T13:27:05.578728Z", + "start_time": "2023-08-21T13:27:05.532042Z" + }, + "collapsed": false + }, + "outputs": [], + "source": [ + "wrapped_model = Model(\n", + " model=pipeline.predict, # A prediction function that encapsulates all the data pre-processing steps and that could be executed with the dataset used by the scan.\n", + " model_type=\"regression\", # Either regression, classification or text_generation.\n", + " name=\"hotel_text_regression\", # Optional.\n", + " feature_names=[FEATURE_COLUMN_NAME] # Default: all columns of your dataset.\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# Validate wrapped model.\n", + "pred_test_wrapped = wrapped_model.predict(wrapped_data).raw_prediction\n", + "wrapped_test_metric = mean_absolute_error(test_Y, pred_test_wrapped)\n", + "print(f\"Wrapped Test MAE: {wrapped_test_metric: .2f}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "collapsed": false + }, + "source": [ + "## Scan your model to find vulnerabilities\n", + "With the Giskard scan feature, you can detect vulnerabilities in your model, including performance biases, unrobustness, data leakage, stochasticity, underconfidence, ethical issues, and more. For detailed information about the scan feature, please refer to our scan documentation." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "results = scan(wrapped_model, wrapped_data)" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": { + "ExecuteTime": { + "end_time": "2023-08-21T13:27:31.846077Z", + "start_time": "2023-08-21T13:27:31.548602Z" + }, + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "display(results)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "collapsed": false + }, + "source": [ + "## Generate a test suite from the Scan\n", + "The objects produced by the scan can be used as fixtures to generate a test suite that integrate domain-specific issues. To create custom tests, refer to the Test your ML Model page." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "test_suite = results.generate_test_suite(\"My first test suite\")\n", + "test_suite.run()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "collapsed": false + }, + "source": [ + "## Customize your suite by loading objects from the Giskard catalog\n", + "\n", + "The Giskard open source catalog will enable to load:\n", + "\n", + "* Tests such as metamorphic, performance, prediction & data drift, statistical tests, etc\n", + "* Slicing functions such as detectors of toxicity, hate, emotion, etc\n", + "* Transformation functions such as generators of typos, paraphrase, style tune, etc\n", + "\n", + "For demo purposes, we will load a simple unit test (test_r2) that checks if the test R2 score is above the given threshold. For more examples of tests and functions, refer to the Giskard catalog." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "test_suite.add_test(testing.test_r2(model=wrapped_model, dataset=wrapped_data, threshold=0.7)).run()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "collapsed": false + }, + "source": [ + "## Upload your suite to the Giskard server\n", + "\n", + "Upload your suite to the Giskard server to:\n", + "\n", + "* Compare models to decide which model to promote\n", + "* Debug your tests to diagnose the issues\n", + "* Create more domain-specific tests that are integrating business feedback\n", + "* Share your results" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# Uploading the test suite will automatically save the model, dataset, tests, slicing & transformation functions inside the Giskard UI server\n", + "# Create a Giskard client after having install the Giskard server (see documentation)\n", + "token = \"API_TOKEN\" # Find it in Settings in the Giskard server\n", + "\n", + "client = GiskardClient(\n", + " url=\"http://localhost:19000\", # URL of your Giskard instance\n", + " token=token\n", + ")\n", + "\n", + "my_project = client.create_project(\"my_project\", \"PROJECT_NAME\", \"DESCRIPTION\")\n", + "\n", + "# Upload to the current project ✉️\n", + "test_suite.upload(client, \"my_project\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "collapsed": false + }, + "source": [ + "
\n", + "Connecting Google Colab with the Giskard server\n", + "\n", + "If you are using Google Colab and you want to install the Giskard server **locally**, you can run the Giskard server by executing this line in the terminal of your **local** machine (see the [documentation](https://docs.giskard.ai/en/latest/guides/installation_app/index.html)):\n", + "\n", + "> giskard server start\n", + "\n", + "Once the Giskard server is running, from the same terminal on your **local** machine, you can run:\n", + "\n", + "> giskard server expose --ngrok-token \\\n", + "\n", + "Read the flowing [instructions](https://docs.giskard.ai/en/latest/cli/ngrok/index.html) in order to get the `ngrok_API_token`. This will provide you with the code snippets that you can copy and paste into your Colab notebook to establish a connection with your locally installed Giskard server\n", + "
" ] - }, - "metadata": {}, - "output_type": "display_data" } - ], - "source": [ - "display(results)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "collapsed": false - }, - "source": [ - "## Generate a test suite from the Scan\n", - "The objects produced by the scan can be used as fixtures to generate a test suite that integrate domain-specific issues. To create custom tests, refer to the Test your ML Model page." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "test_suite = results.generate_test_suite(\"My first test suite\")\n", - "test_suite.run()" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "collapsed": false - }, - "source": [ - "## Customize your suite by loading objects from the Giskard catalog\n", - "\n", - "The Giskard open source catalog will enable to load:\n", - "\n", - "* Tests such as metamorphic, performance, prediction & data drift, statistical tests, etc\n", - "* Slicing functions such as detectors of toxicity, hate, emotion, etc\n", - "* Transformation functions such as generators of typos, paraphrase, style tune, etc\n", - "\n", - "For demo purposes, we will load a simple unit test (test_r2) that checks if the test R2 score is above the given threshold. For more examples of tests and functions, refer to the Giskard catalog." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "test_suite.add_test(testing.test_r2(model=wrapped_model, dataset=wrapped_data, threshold=0.7)).run()" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "collapsed": false - }, - "source": [ - "## Upload your suite to the Giskard server\n", - "\n", - "Upload your suite to the Giskard server to:\n", - "\n", - "* Compare models to decide which model to promote\n", - "* Debug your tests to diagnose the issues\n", - "* Create more domain-specific tests that are integrating business feedback\n", - "* Share your results" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "# Uploading the test suite will automatically save the model, dataset, tests, slicing & transformation functions inside the Giskard UI server\n", - "# Create a Giskard client after having install the Giskard server (see documentation)\n", - "token = \"API_TOKEN\" # Find it in Settings in the Giskard server\n", - "\n", - "client = GiskardClient(\n", - " url=\"http://localhost:19000\", # URL of your Giskard instance\n", - " token=token\n", - ")\n", - "\n", - "my_project = client.create_project(\"my_project\", \"PROJECT_NAME\", \"DESCRIPTION\")\n", - "\n", - "# Upload to the current project ✉️\n", - "test_suite.upload(client, \"my_project\")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "collapsed": false - }, - "source": [ - "
\n", - "Connecting Google Colab with the Giskard server\n", - "\n", - "If you are using Google Colab and you want to install the Giskard server **locally**, you can run the Giskard server by executing this line in the terminal of your **local** machine (see the [documentation](https://docs.giskard.ai/en/latest/guides/installation_app/index.html)):\n", - "\n", - "> giskard server start\n", - "\n", - "Once the Giskard server is running, from the same terminal on your **local** machine, you can run:\n", - "\n", - "> giskard server expose --token \\\n", - "\n", - "Read the flowing [instructions](https://docs.giskard.ai/en/latest/cli/ngrok/index.html) in order to get the `ngrok_API_token`. This will provide you with the code snippets that you can copy and paste into your Colab notebook to establish a connection with your locally installed Giskard server\n", - "
" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.1" + } }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.6.1" - } - }, - "nbformat": 4, - "nbformat_minor": 1 -} + "nbformat": 4, + "nbformat_minor": 1 +} \ No newline at end of file diff --git a/python-client/docs/reference/notebooks/ieee_fraud_detection_adversarial_validation.ipynb b/python-client/docs/reference/notebooks/ieee_fraud_detection_adversarial_validation.ipynb index 07f400bffe..2daa46cc2a 100644 --- a/python-client/docs/reference/notebooks/ieee_fraud_detection_adversarial_validation.ipynb +++ b/python-client/docs/reference/notebooks/ieee_fraud_detection_adversarial_validation.ipynb @@ -1,2799 +1,2799 @@ { - "cells": [ - { - "attachments": {}, - "cell_type": "markdown", - "metadata": { - "collapsed": false - }, - "source": [ - "# IEEE Fraud detection adversarial validation [lightgbm]\n", - "* IEEE Fraud detection train/test data binary classification task.\n", - "* Reference notebook: \n", - "* Dataset: \n", - "\n", - "By running this notebook, you’ll create a whole test suite in a few lines of code. The model used here is a LGBM classification model with the IEEE fraud detection dataset. Feel free to use your own model (tabular, text, or LLM).\n", - "\n", - "You’ll learn how to:\n", - "\n", - "* Detect vulnerabilities by scanning the model\n", - "* Generate a test suite with domain-specific tests\n", - "* Customize your test suite by loading a test from the Giskard catalog\n", - "* Upload your model to the Giskard server to:\n", - " * Compare models to decide which one to promote\n", - " * Debug your tests to diagnose issues\n", - " * Share your results and collect business feedback from your team" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "collapsed": false - }, - "source": [ - "## Install Giskard" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "pip install \"giskard>=2.0.0b\" -U" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "collapsed": false - }, - "source": [ - "## Troubleshooting\n", - "\n", - "If you encounter a segmentation fault on macOS at any point during this tutorial, check: https://docs.giskard.ai/en/latest/contribute/dev-environment.html" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "collapsed": false - }, - "source": [ - "## Import libraries" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "import os\n", - "from pathlib import Path\n", - "from urllib.request import urlretrieve\n", - "\n", - "import numpy as np\n", - "import pandas as pd\n", - "from lightgbm import LGBMClassifier\n", - "from pandas.api.types import union_categoricals\n", - "from sklearn.metrics import roc_auc_score\n", - "from sklearn.model_selection import train_test_split\n", - "\n", - "import giskard\n", - "from giskard import GiskardClient, testing, Dataset, Model" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "collapsed": false - }, - "source": [ - "## Define constants" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": { - "ExecuteTime": { - "end_time": "2023-08-21T13:28:55.240118Z", - "start_time": "2023-08-21T13:28:55.183812Z" + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "collapsed": false + }, + "source": [ + "# IEEE Fraud detection adversarial validation [lightgbm]\n", + "* IEEE Fraud detection train/test data binary classification task.\n", + "* Reference notebook: \n", + "* Dataset: \n", + "\n", + "By running this notebook, you’ll create a whole test suite in a few lines of code. The model used here is a LGBM classification model with the IEEE fraud detection dataset. Feel free to use your own model (tabular, text, or LLM).\n", + "\n", + "You’ll learn how to:\n", + "\n", + "* Detect vulnerabilities by scanning the model\n", + "* Generate a test suite with domain-specific tests\n", + "* Customize your test suite by loading a test from the Giskard catalog\n", + "* Upload your model to the Giskard server to:\n", + " * Compare models to decide which one to promote\n", + " * Debug your tests to diagnose issues\n", + " * Share your results and collect business feedback from your team" + ] }, - "collapsed": false - }, - "outputs": [], - "source": [ - "# Constants.\n", - "TARGET_COLUMN = 'isTest'\n", - "IDX_LABEL = 'TransactionID'\n", - "\n", - "# Paths.\n", - "DATA_URL = \"ftp://sys.giskard.ai/pub/unit_test_resources/fraud_detection_classification_dataset/{}\"\n", - "DATA_PATH = Path.home() / \".giskard\" / \"fraud_detection_classification_dataset\"" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "collapsed": false - }, - "source": [ - "## Dataset preparation" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "collapsed": false - }, - "source": [ - "### Load and preprocess data" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": { - "ExecuteTime": { - "end_time": "2023-08-21T13:28:55.269180Z", - "start_time": "2023-08-21T13:28:55.205245Z" + { + "cell_type": "markdown", + "metadata": { + "collapsed": false + }, + "source": [ + "## Install Giskard" + ] }, - "collapsed": false - }, - "outputs": [], - "source": [ - "def fetch_from_ftp(url: str, file: Path) -> None:\n", - " \"\"\"Helper to fetch data from the FTP server.\"\"\"\n", - " if not file.parent.exists():\n", - " file.parent.mkdir(parents=True, exist_ok=True)\n", - "\n", - " if not file.exists():\n", - " print(f\"Downloading data from {url}\")\n", - " urlretrieve(url, file)\n", - "\n", - " print(f\"Data was loaded!\")\n", - "\n", - "\n", - "def fetch_dataset():\n", - " files_to_fetch = [\"train_transaction.csv\", \"train_identity.csv\", \"test_transaction.csv\", \"test_identity.csv\"]\n", - " for file_name in files_to_fetch:\n", - " fetch_from_ftp(DATA_URL.format(file_name), DATA_PATH / file_name)\n", - "\n", - "\n", - "# Define data-types of transactions features.\n", - "DATA_TYPES_TRANSACTION = {\n", - " 'TransactionID': 'int32',\n", - " 'isFraud': 'int8',\n", - " 'TransactionDT': 'int32',\n", - " 'TransactionAmt': 'float32',\n", - " 'ProductCD': 'category',\n", - " 'card1': 'int16',\n", - " 'card2': 'float32',\n", - " 'card3': 'float32',\n", - " 'card4': 'category',\n", - " 'card5': 'float32',\n", - " 'card6': 'category',\n", - " 'addr1': 'float32',\n", - " 'addr2': 'float32',\n", - " 'dist1': 'float32',\n", - " 'dist2': 'float32',\n", - " 'P_emaildomain': 'category',\n", - " 'R_emaildomain': 'category',\n", - "}\n", - "\n", - "C_COLS = [f'C{i}' for i in range(1, 15)]\n", - "D_COLS = [f'D{i}' for i in range(1, 16)]\n", - "M_COLS = [f'M{i}' for i in range(1, 10)]\n", - "V_COLS = [f'V{i}' for i in range(1, 340)]\n", - "\n", - "DATA_TYPES_TRANSACTION.update((c, 'float32') for c in C_COLS)\n", - "DATA_TYPES_TRANSACTION.update((c, 'float32') for c in D_COLS)\n", - "DATA_TYPES_TRANSACTION.update((c, 'float32') for c in V_COLS)\n", - "DATA_TYPES_TRANSACTION.update((c, 'category') for c in M_COLS)\n", - "\n", - "# Define datatypes of identity features.\n", - "DATA_TYPES_ID = {\n", - " 'TransactionID': 'int32',\n", - " 'DeviceType': 'category',\n", - " 'DeviceInfo': 'category',\n", - "}\n", - "\n", - "ID_COLS = [f'id_{i:02d}' for i in range(1, 39)]\n", - "ID_CATS = [\n", - " 'id_12', 'id_15', 'id_16', 'id_23', 'id_27', 'id_28', 'id_29', 'id_30',\n", - " 'id_31', 'id_33', 'id_34', 'id_35', 'id_36', 'id_37', 'id_38'\n", - "]\n", - "\n", - "DATA_TYPES_ID.update(((c, 'float32') for c in ID_COLS))\n", - "DATA_TYPES_ID.update(((c, 'category') for c in ID_CATS))\n", - "\n", - "# Define list of all categorical features.\n", - "CATEGORICALS = [f_name for (f_name, f_type) in dict(DATA_TYPES_TRANSACTION, **DATA_TYPES_ID).items() if\n", - " f_type == \"category\"]\n", - "\n", - "\n", - "def read_set(_type):\n", - " \"\"\"Read both transactions and identity data.\"\"\"\n", - " print(f\"Reading transactions data...\")\n", - " _df = pd.read_csv(os.path.join(DATA_PATH, f'{_type}_transaction.csv'),\n", - " index_col=IDX_LABEL, dtype=DATA_TYPES_TRANSACTION, nrows=250)\n", - "\n", - " print(f\"Reading identity data...\")\n", - " _df = _df.join(pd.read_csv(os.path.join(DATA_PATH, f'{_type}_identity.csv'),\n", - " index_col=IDX_LABEL, dtype=DATA_TYPES_ID))\n", - " return _df\n", - "\n", - "\n", - "def read_dataset():\n", - " \"\"\"Read whole data.\"\"\"\n", - " fetch_dataset()\n", - "\n", - " print(f\"Reading train data...\")\n", - " train_set = read_set('train')\n", - "\n", - " print(f\"Reading test data...\")\n", - " test_set = read_set('test')\n", - "\n", - " return train_set, test_set" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": { - "ExecuteTime": { - "end_time": "2023-08-21T13:28:55.311451Z", - "start_time": "2023-08-21T13:28:55.267212Z" + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "pip install \"giskard>=2.0.0b\" -U" + ] }, - "collapsed": false - }, - "outputs": [], - "source": [ - "def preprocess_dataset(train_set, test_set):\n", - " \"\"\"Unite train and test into common dataframe.\"\"\"\n", - " # Create a new target column and remove a former one from the train data.\n", - " print(\"Start data preprocessing...\")\n", - " train_set.pop('isFraud')\n", - " train_set['isTest'] = 0\n", - " test_set['isTest'] = 1\n", - "\n", - " # Preprocess categorical features.\n", - " n_train = train_set.shape[0]\n", - " for c in train_set.columns:\n", - " s = train_set[c]\n", - " if hasattr(s, 'cat'):\n", - " u = union_categoricals([train_set[c], test_set[c]], sort_categories=True)\n", - " train_set[c] = u[:n_train]\n", - " test_set[c] = u[n_train:]\n", - "\n", - " # Unite train and test data.\n", - " united = pd.concat([train_set, test_set])\n", - "\n", - " # Add additional features.\n", - " united['TimeInDay'] = united.TransactionDT % 86400\n", - " united['Cents'] = united.TransactionAmt % 1\n", - "\n", - " # Remove useless columns.\n", - " united.drop(\"TransactionDT\", axis=1, inplace=True)\n", - "\n", - " print(f\"Dataset merged and preprocessed! Resulted shape: {united.shape}\")\n", - "\n", - " return united" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "_kg_hide-input": true - }, - "outputs": [], - "source": [ - "united_dataset = preprocess_dataset(*read_dataset())" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "collapsed": false - }, - "source": [ - "### Train-test split" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": { - "ExecuteTime": { - "end_time": "2023-08-21T13:28:59.169388Z", - "start_time": "2023-08-21T13:28:59.146722Z" + { + "cell_type": "markdown", + "metadata": { + "collapsed": false + }, + "source": [ + "## Troubleshooting\n", + "\n", + "If you encounter a segmentation fault on macOS at any point during this tutorial, check: https://docs.giskard.ai/en/latest/contribute/dev-environment.html" + ] }, - "collapsed": false - }, - "outputs": [], - "source": [ - "X_train, X_test, y_train, y_test = train_test_split(united_dataset.drop(TARGET_COLUMN, axis=1),\n", - " united_dataset[TARGET_COLUMN], test_size=0.25)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "collapsed": false - }, - "source": [ - "### Wrap dataset with Giskard" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": { - "ExecuteTime": { - "end_time": "2023-08-21T13:29:39.807910Z", - "start_time": "2023-08-21T13:29:39.507770Z" + { + "cell_type": "markdown", + "metadata": { + "collapsed": false + }, + "source": [ + "## Import libraries" + ] }, - "collapsed": false - }, - "outputs": [], - "source": [ - "raw_dataset = pd.concat([X_test, y_test], axis=1)\n", - "wrapped_dataset = Dataset(\n", - " df=raw_dataset, # A pandas.DataFrame that contains the raw data (before all the pre-processing steps) and the actual ground truth variable (target).\n", - " target=TARGET_COLUMN, # Ground truth variable.\n", - " name=\"fraud_detection_adversarial_dataset\", # Optional.\n", - " cat_columns=CATEGORICALS # List of categorical columns. Optional, but is a MUST if available. Inferred automatically if not.\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "collapsed": false - }, - "source": [ - "## Model training" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "collapsed": false - }, - "source": [ - "### Build estimator" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "# Define parameters of an estimator.\n", - "ESTIMATOR_PARAMS = {\n", - " 'num_leaves': 64,\n", - " 'objective': 'binary',\n", - " 'min_data_in_leaf': 10,\n", - " 'learning_rate': 0.1,\n", - " 'feature_fraction': 0.5,\n", - " 'bagging_fraction': 0.9,\n", - " 'bagging_freq': 1,\n", - " 'max_cat_to_onehot': 128,\n", - " 'metric': 'auc',\n", - " 'n_jobs': -1,\n", - " 'seed': 42,\n", - " 'subsample_for_bin': united_dataset.shape[0]\n", - "}\n", - "\n", - "estimator = LGBMClassifier(**ESTIMATOR_PARAMS)\n", - "estimator.fit(X_train, y_train)\n", - "\n", - "train_metric = roc_auc_score(y_train, estimator.predict_proba(X_train)[:, 1].T)\n", - "test_metric = roc_auc_score(y_test, estimator.predict_proba(X_test)[:, 1].T)\n", - "\n", - "print(f\"Train ROC-AUC score: {train_metric:.2f}\")\n", - "print(f\"Test ROC-AUC score: {test_metric:.2f}\")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "collapsed": false - }, - "source": [ - "### Wrap model with Giskard" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": { - "ExecuteTime": { - "end_time": "2023-08-21T13:29:41.268764Z", - "start_time": "2023-08-21T13:29:41.245709Z" + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "import os\n", + "from pathlib import Path\n", + "from urllib.request import urlretrieve\n", + "\n", + "import numpy as np\n", + "import pandas as pd\n", + "from lightgbm import LGBMClassifier\n", + "from pandas.api.types import union_categoricals\n", + "from sklearn.metrics import roc_auc_score\n", + "from sklearn.model_selection import train_test_split\n", + "\n", + "import giskard\n", + "from giskard import GiskardClient, testing, Dataset, Model" + ] }, - "collapsed": false - }, - "outputs": [], - "source": [ - "def prediction_function(df: pd.DataFrame) -> np.ndarray:\n", - " return estimator.predict_proba(df)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "wrapped_model = Model(\n", - " model=prediction_function, # A prediction function that encapsulates all the data pre-processing steps and that could be executed with the dataset used by the scan.\n", - " model_type=\"classification\", # Either regression, classification or text_generation.\n", - " name=\"train_test_data_classifier\", # Optional.\n", - " classification_labels=[0, 1], # Their order MUST be identical to the prediction_function's output order.\n", - " feature_names=X_train.columns, # Default: all columns of your dataset.\n", - " # classification_threshold=0.5 # Default: 0.5.\n", - ")\n", - "\n", - "# Validate wrapped model.\n", - "wrapped_test_metric = roc_auc_score(y_test, wrapped_model.predict(wrapped_dataset).raw[:, 1].T)\n", - "print(f\"Wrapped Test ROC-AUC score: {wrapped_test_metric:.2f}\")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "collapsed": false - }, - "source": [ - "## Scan your model to find vulnerabilities\n", - "With the Giskard scan feature, you can detect vulnerabilities in your model, including performance biases, unrobustness, data leakage, stochasticity, underconfidence, ethical issues, and more. For detailed information about the scan feature, please refer to our scan documentation." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "results = giskard.scan(wrapped_model, wrapped_dataset)" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "metadata": { - "ExecuteTime": { - "end_time": "2023-08-21T13:36:54.651751Z", - "start_time": "2023-08-21T13:36:44.742285Z" + { + "cell_type": "markdown", + "metadata": { + "collapsed": false + }, + "source": [ + "## Define constants" + ] }, - "collapsed": false - }, - "outputs": [ { - "data": { - "text/html": [ - "\n", - "" + "cell_type": "code", + "execution_count": 2, + "metadata": { + "ExecuteTime": { + "end_time": "2023-08-21T13:28:55.240118Z", + "start_time": "2023-08-21T13:28:55.183812Z" + }, + "collapsed": false + }, + "outputs": [], + "source": [ + "# Constants.\n", + "TARGET_COLUMN = 'isTest'\n", + "IDX_LABEL = 'TransactionID'\n", + "\n", + "# Paths.\n", + "DATA_URL = \"ftp://sys.giskard.ai/pub/unit_test_resources/fraud_detection_classification_dataset/{}\"\n", + "DATA_PATH = Path.home() / \".giskard\" / \"fraud_detection_classification_dataset\"" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "collapsed": false + }, + "source": [ + "## Dataset preparation" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "collapsed": false + }, + "source": [ + "### Load and preprocess data" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "ExecuteTime": { + "end_time": "2023-08-21T13:28:55.269180Z", + "start_time": "2023-08-21T13:28:55.205245Z" + }, + "collapsed": false + }, + "outputs": [], + "source": [ + "def fetch_from_ftp(url: str, file: Path) -> None:\n", + " \"\"\"Helper to fetch data from the FTP server.\"\"\"\n", + " if not file.parent.exists():\n", + " file.parent.mkdir(parents=True, exist_ok=True)\n", + "\n", + " if not file.exists():\n", + " print(f\"Downloading data from {url}\")\n", + " urlretrieve(url, file)\n", + "\n", + " print(f\"Data was loaded!\")\n", + "\n", + "\n", + "def fetch_dataset():\n", + " files_to_fetch = [\"train_transaction.csv\", \"train_identity.csv\", \"test_transaction.csv\", \"test_identity.csv\"]\n", + " for file_name in files_to_fetch:\n", + " fetch_from_ftp(DATA_URL.format(file_name), DATA_PATH / file_name)\n", + "\n", + "\n", + "# Define data-types of transactions features.\n", + "DATA_TYPES_TRANSACTION = {\n", + " 'TransactionID': 'int32',\n", + " 'isFraud': 'int8',\n", + " 'TransactionDT': 'int32',\n", + " 'TransactionAmt': 'float32',\n", + " 'ProductCD': 'category',\n", + " 'card1': 'int16',\n", + " 'card2': 'float32',\n", + " 'card3': 'float32',\n", + " 'card4': 'category',\n", + " 'card5': 'float32',\n", + " 'card6': 'category',\n", + " 'addr1': 'float32',\n", + " 'addr2': 'float32',\n", + " 'dist1': 'float32',\n", + " 'dist2': 'float32',\n", + " 'P_emaildomain': 'category',\n", + " 'R_emaildomain': 'category',\n", + "}\n", + "\n", + "C_COLS = [f'C{i}' for i in range(1, 15)]\n", + "D_COLS = [f'D{i}' for i in range(1, 16)]\n", + "M_COLS = [f'M{i}' for i in range(1, 10)]\n", + "V_COLS = [f'V{i}' for i in range(1, 340)]\n", + "\n", + "DATA_TYPES_TRANSACTION.update((c, 'float32') for c in C_COLS)\n", + "DATA_TYPES_TRANSACTION.update((c, 'float32') for c in D_COLS)\n", + "DATA_TYPES_TRANSACTION.update((c, 'float32') for c in V_COLS)\n", + "DATA_TYPES_TRANSACTION.update((c, 'category') for c in M_COLS)\n", + "\n", + "# Define datatypes of identity features.\n", + "DATA_TYPES_ID = {\n", + " 'TransactionID': 'int32',\n", + " 'DeviceType': 'category',\n", + " 'DeviceInfo': 'category',\n", + "}\n", + "\n", + "ID_COLS = [f'id_{i:02d}' for i in range(1, 39)]\n", + "ID_CATS = [\n", + " 'id_12', 'id_15', 'id_16', 'id_23', 'id_27', 'id_28', 'id_29', 'id_30',\n", + " 'id_31', 'id_33', 'id_34', 'id_35', 'id_36', 'id_37', 'id_38'\n", + "]\n", + "\n", + "DATA_TYPES_ID.update(((c, 'float32') for c in ID_COLS))\n", + "DATA_TYPES_ID.update(((c, 'category') for c in ID_CATS))\n", + "\n", + "# Define list of all categorical features.\n", + "CATEGORICALS = [f_name for (f_name, f_type) in dict(DATA_TYPES_TRANSACTION, **DATA_TYPES_ID).items() if\n", + " f_type == \"category\"]\n", + "\n", + "\n", + "def read_set(_type):\n", + " \"\"\"Read both transactions and identity data.\"\"\"\n", + " print(f\"Reading transactions data...\")\n", + " _df = pd.read_csv(os.path.join(DATA_PATH, f'{_type}_transaction.csv'),\n", + " index_col=IDX_LABEL, dtype=DATA_TYPES_TRANSACTION, nrows=250)\n", + "\n", + " print(f\"Reading identity data...\")\n", + " _df = _df.join(pd.read_csv(os.path.join(DATA_PATH, f'{_type}_identity.csv'),\n", + " index_col=IDX_LABEL, dtype=DATA_TYPES_ID))\n", + " return _df\n", + "\n", + "\n", + "def read_dataset():\n", + " \"\"\"Read whole data.\"\"\"\n", + " fetch_dataset()\n", + "\n", + " print(f\"Reading train data...\")\n", + " train_set = read_set('train')\n", + "\n", + " print(f\"Reading test data...\")\n", + " test_set = read_set('test')\n", + "\n", + " return train_set, test_set" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "ExecuteTime": { + "end_time": "2023-08-21T13:28:55.311451Z", + "start_time": "2023-08-21T13:28:55.267212Z" + }, + "collapsed": false + }, + "outputs": [], + "source": [ + "def preprocess_dataset(train_set, test_set):\n", + " \"\"\"Unite train and test into common dataframe.\"\"\"\n", + " # Create a new target column and remove a former one from the train data.\n", + " print(\"Start data preprocessing...\")\n", + " train_set.pop('isFraud')\n", + " train_set['isTest'] = 0\n", + " test_set['isTest'] = 1\n", + "\n", + " # Preprocess categorical features.\n", + " n_train = train_set.shape[0]\n", + " for c in train_set.columns:\n", + " s = train_set[c]\n", + " if hasattr(s, 'cat'):\n", + " u = union_categoricals([train_set[c], test_set[c]], sort_categories=True)\n", + " train_set[c] = u[:n_train]\n", + " test_set[c] = u[n_train:]\n", + "\n", + " # Unite train and test data.\n", + " united = pd.concat([train_set, test_set])\n", + "\n", + " # Add additional features.\n", + " united['TimeInDay'] = united.TransactionDT % 86400\n", + " united['Cents'] = united.TransactionAmt % 1\n", + "\n", + " # Remove useless columns.\n", + " united.drop(\"TransactionDT\", axis=1, inplace=True)\n", + "\n", + " print(f\"Dataset merged and preprocessed! Resulted shape: {united.shape}\")\n", + "\n", + " return united" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "_kg_hide-input": true + }, + "outputs": [], + "source": [ + "united_dataset = preprocess_dataset(*read_dataset())" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "collapsed": false + }, + "source": [ + "### Train-test split" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "ExecuteTime": { + "end_time": "2023-08-21T13:28:59.169388Z", + "start_time": "2023-08-21T13:28:59.146722Z" + }, + "collapsed": false + }, + "outputs": [], + "source": [ + "X_train, X_test, y_train, y_test = train_test_split(united_dataset.drop(TARGET_COLUMN, axis=1),\n", + " united_dataset[TARGET_COLUMN], test_size=0.25)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "collapsed": false + }, + "source": [ + "### Wrap dataset with Giskard" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": { + "ExecuteTime": { + "end_time": "2023-08-21T13:29:39.807910Z", + "start_time": "2023-08-21T13:29:39.507770Z" + }, + "collapsed": false + }, + "outputs": [], + "source": [ + "raw_dataset = pd.concat([X_test, y_test], axis=1)\n", + "wrapped_dataset = Dataset(\n", + " df=raw_dataset, # A pandas.DataFrame that contains the raw data (before all the pre-processing steps) and the actual ground truth variable (target).\n", + " target=TARGET_COLUMN, # Ground truth variable.\n", + " name=\"fraud_detection_adversarial_dataset\", # Optional.\n", + " cat_columns=CATEGORICALS # List of categorical columns. Optional, but is a MUST if available. Inferred automatically if not.\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "collapsed": false + }, + "source": [ + "## Model training" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "collapsed": false + }, + "source": [ + "### Build estimator" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# Define parameters of an estimator.\n", + "ESTIMATOR_PARAMS = {\n", + " 'num_leaves': 64,\n", + " 'objective': 'binary',\n", + " 'min_data_in_leaf': 10,\n", + " 'learning_rate': 0.1,\n", + " 'feature_fraction': 0.5,\n", + " 'bagging_fraction': 0.9,\n", + " 'bagging_freq': 1,\n", + " 'max_cat_to_onehot': 128,\n", + " 'metric': 'auc',\n", + " 'n_jobs': -1,\n", + " 'seed': 42,\n", + " 'subsample_for_bin': united_dataset.shape[0]\n", + "}\n", + "\n", + "estimator = LGBMClassifier(**ESTIMATOR_PARAMS)\n", + "estimator.fit(X_train, y_train)\n", + "\n", + "train_metric = roc_auc_score(y_train, estimator.predict_proba(X_train)[:, 1].T)\n", + "test_metric = roc_auc_score(y_test, estimator.predict_proba(X_test)[:, 1].T)\n", + "\n", + "print(f\"Train ROC-AUC score: {train_metric:.2f}\")\n", + "print(f\"Test ROC-AUC score: {test_metric:.2f}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "collapsed": false + }, + "source": [ + "### Wrap model with Giskard" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": { + "ExecuteTime": { + "end_time": "2023-08-21T13:29:41.268764Z", + "start_time": "2023-08-21T13:29:41.245709Z" + }, + "collapsed": false + }, + "outputs": [], + "source": [ + "def prediction_function(df: pd.DataFrame) -> np.ndarray:\n", + " return estimator.predict_proba(df)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "wrapped_model = Model(\n", + " model=prediction_function, # A prediction function that encapsulates all the data pre-processing steps and that could be executed with the dataset used by the scan.\n", + " model_type=\"classification\", # Either regression, classification or text_generation.\n", + " name=\"train_test_data_classifier\", # Optional.\n", + " classification_labels=[0, 1], # Their order MUST be identical to the prediction_function's output order.\n", + " feature_names=X_train.columns, # Default: all columns of your dataset.\n", + " # classification_threshold=0.5 # Default: 0.5.\n", + ")\n", + "\n", + "# Validate wrapped model.\n", + "wrapped_test_metric = roc_auc_score(y_test, wrapped_model.predict(wrapped_dataset).raw[:, 1].T)\n", + "print(f\"Wrapped Test ROC-AUC score: {wrapped_test_metric:.2f}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "collapsed": false + }, + "source": [ + "## Scan your model to find vulnerabilities\n", + "With the Giskard scan feature, you can detect vulnerabilities in your model, including performance biases, unrobustness, data leakage, stochasticity, underconfidence, ethical issues, and more. For detailed information about the scan feature, please refer to our scan documentation." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "results = giskard.scan(wrapped_model, wrapped_dataset)" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": { + "ExecuteTime": { + "end_time": "2023-08-21T13:36:54.651751Z", + "start_time": "2023-08-21T13:36:44.742285Z" + }, + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "display(results)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "collapsed": false + }, + "source": [ + "## Generate a test suite from the Scan\n", + "The objects produced by the scan can be used as fixtures to generate a test suite that integrate domain-specific issues. To create custom tests, refer to the Test your ML Model page." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "test_suite = results.generate_test_suite(\"My first test suite\")\n", + "test_suite.run()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "collapsed": false + }, + "source": [ + "## Customize your suite by loading objects from the Giskard catalog\n", + "\n", + "The Giskard open source catalog will enable to load:\n", + "\n", + "* Tests such as metamorphic, performance, prediction & data drift, statistical tests, etc\n", + "* Slicing functions such as detectors of toxicity, hate, emotion, etc\n", + "* Transformation functions such as generators of typos, paraphrase, style tune, etc\n", + "\n", + "For demo purposes, we will load a simple unit test (test_f1) that checks if the test F1 score is above the given threshold. For more examples of tests and functions, refer to the Giskard catalog." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "test_suite.add_test(testing.test_f1(model=wrapped_model, dataset=wrapped_dataset, threshold=0.7)).run()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "collapsed": false + }, + "source": [ + "## Upload your suite to the Giskard server\n", + "\n", + "Upload your suite to the Giskard server to:\n", + "\n", + "* Compare models to decide which model to promote\n", + "* Debug your tests to diagnose the issues\n", + "* Create more domain-specific tests that are integrating business feedback\n", + "* Share your results" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# Uploading the test suite will automatically save the model, dataset, tests, slicing & transformation functions inside the Giskard UI server\n", + "# Create a Giskard client after having install the Giskard server (see documentation)\n", + "token = \"API_TOKEN\" # Find it in Settings in the Giskard server\n", + "\n", + "client = GiskardClient(\n", + " url=\"http://localhost:19000\", # URL of your Giskard instance\n", + " token=token\n", + ")\n", + "\n", + "my_project = client.create_project(\"my_project\", \"PROJECT_NAME\", \"DESCRIPTION\")\n", + "\n", + "# Upload to the current project ✉️\n", + "test_suite.upload(client, \"my_project\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "collapsed": false + }, + "source": [ + "
\n", + "Connecting Google Colab with the Giskard server\n", + "\n", + "If you are using Google Colab and you want to install the Giskard server **locally**, you can run the Giskard server by executing this line in the terminal of your **local** machine (see the [documentation](https://docs.giskard.ai/en/latest/guides/installation_app/index.html)):\n", + "\n", + "> giskard server start\n", + "\n", + "Once the Giskard server is running, from the same terminal on your **local** machine, you can run:\n", + "\n", + "> giskard server expose --ngrok-token \\\n", + "\n", + "Read the flowing [instructions](https://docs.giskard.ai/en/latest/cli/ngrok/index.html) in order to get the `ngrok_API_token`. This will provide you with the code snippets that you can copy and paste into your Colab notebook to establish a connection with your locally installed Giskard server\n", + "
" ] - }, - "metadata": {}, - "output_type": "display_data" } - ], - "source": [ - "display(results)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "collapsed": false - }, - "source": [ - "## Generate a test suite from the Scan\n", - "The objects produced by the scan can be used as fixtures to generate a test suite that integrate domain-specific issues. To create custom tests, refer to the Test your ML Model page." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "test_suite = results.generate_test_suite(\"My first test suite\")\n", - "test_suite.run()" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "collapsed": false - }, - "source": [ - "## Customize your suite by loading objects from the Giskard catalog\n", - "\n", - "The Giskard open source catalog will enable to load:\n", - "\n", - "* Tests such as metamorphic, performance, prediction & data drift, statistical tests, etc\n", - "* Slicing functions such as detectors of toxicity, hate, emotion, etc\n", - "* Transformation functions such as generators of typos, paraphrase, style tune, etc\n", - "\n", - "For demo purposes, we will load a simple unit test (test_f1) that checks if the test F1 score is above the given threshold. For more examples of tests and functions, refer to the Giskard catalog." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "test_suite.add_test(testing.test_f1(model=wrapped_model, dataset=wrapped_dataset, threshold=0.7)).run()" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "collapsed": false - }, - "source": [ - "## Upload your suite to the Giskard server\n", - "\n", - "Upload your suite to the Giskard server to:\n", - "\n", - "* Compare models to decide which model to promote\n", - "* Debug your tests to diagnose the issues\n", - "* Create more domain-specific tests that are integrating business feedback\n", - "* Share your results" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "# Uploading the test suite will automatically save the model, dataset, tests, slicing & transformation functions inside the Giskard UI server\n", - "# Create a Giskard client after having install the Giskard server (see documentation)\n", - "token = \"API_TOKEN\" # Find it in Settings in the Giskard server\n", - "\n", - "client = GiskardClient(\n", - " url=\"http://localhost:19000\", # URL of your Giskard instance\n", - " token=token\n", - ")\n", - "\n", - "my_project = client.create_project(\"my_project\", \"PROJECT_NAME\", \"DESCRIPTION\")\n", - "\n", - "# Upload to the current project ✉️\n", - "test_suite.upload(client, \"my_project\")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "collapsed": false - }, - "source": [ - "
\n", - "Connecting Google Colab with the Giskard server\n", - "\n", - "If you are using Google Colab and you want to install the Giskard server **locally**, you can run the Giskard server by executing this line in the terminal of your **local** machine (see the [documentation](https://docs.giskard.ai/en/latest/guides/installation_app/index.html)):\n", - "\n", - "> giskard server start\n", - "\n", - "Once the Giskard server is running, from the same terminal on your **local** machine, you can run:\n", - "\n", - "> giskard server expose --token \\\n", - "\n", - "Read the flowing [instructions](https://docs.giskard.ai/en/latest/cli/ngrok/index.html) in order to get the `ngrok_API_token`. This will provide you with the code snippets that you can copy and paste into your Colab notebook to establish a connection with your locally installed Giskard server\n", - "
" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.6" + } }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.6.6" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} + "nbformat": 4, + "nbformat_minor": 2 +} \ No newline at end of file diff --git a/python-client/docs/reference/notebooks/insurance_prediction_lgbm.ipynb b/python-client/docs/reference/notebooks/insurance_prediction_lgbm.ipynb index e019ae79d5..c4a6c76759 100644 --- a/python-client/docs/reference/notebooks/insurance_prediction_lgbm.ipynb +++ b/python-client/docs/reference/notebooks/insurance_prediction_lgbm.ipynb @@ -1,1946 +1,1946 @@ { - "cells": [ - { - "cell_type": "markdown", - "id": "87600f5fd00ccd99", - "metadata": { - "collapsed": false - }, - "source": [ - "# Insurance charges prediction [LightGBM]\n", - "* Regression to predict the insurance charges based on medical and social data.\n", - "* Dataset: \n", - "\n", - "By running this notebook, you’ll create a whole test suite in a few lines of code. The model used here is a regression model with medical and social data in a tabular format and insurance charges as numerical target variable. Feel free to use your own model.\n", - "\n", - "You’ll learn how to:\n", - "\n", - "* Detect vulnerabilities by scanning the model\n", - "* Generate a test suite with domain-specific tests\n", - "* Customize your test suite by loading a test from the Giskard catalog\n", - "* Upload your model to the Giskard server to:\n", - "* Compare models to decide which one to promote\n", - "* Debug your tests to diagnose issues\n", - "* Share your results and collect business feedback from your team" - ] - }, - { - "cell_type": "markdown", - "id": "c1c0ae5d3f2ab62c", - "metadata": { - "collapsed": false - }, - "source": [ - "## Install necessary python packages" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "797dd8f836bd3a03", - "metadata": { - "ExecuteTime": { - "end_time": "2023-08-01T15:21:01.837060Z", - "start_time": "2023-08-01T15:21:01.831520Z" + "cells": [ + { + "cell_type": "markdown", + "id": "87600f5fd00ccd99", + "metadata": { + "collapsed": false + }, + "source": [ + "# Insurance charges prediction [LightGBM]\n", + "* Regression to predict the insurance charges based on medical and social data.\n", + "* Dataset: \n", + "\n", + "By running this notebook, you’ll create a whole test suite in a few lines of code. The model used here is a regression model with medical and social data in a tabular format and insurance charges as numerical target variable. Feel free to use your own model.\n", + "\n", + "You’ll learn how to:\n", + "\n", + "* Detect vulnerabilities by scanning the model\n", + "* Generate a test suite with domain-specific tests\n", + "* Customize your test suite by loading a test from the Giskard catalog\n", + "* Upload your model to the Giskard server to:\n", + "* Compare models to decide which one to promote\n", + "* Debug your tests to diagnose issues\n", + "* Share your results and collect business feedback from your team" + ] }, - "collapsed": false - }, - "outputs": [], - "source": [ - "!pip install \"giskard>=2.0.0b\" -U\n", - "!pip install lightgbm" - ] - }, - { - "cell_type": "markdown", - "id": "e37ff3bea8055af3", - "metadata": { - "collapsed": false - }, - "source": [ - "## Troubleshooting\n", - "\n", - "If you encounter a segmentation fault on macOS at any point during this tutorial, check: https://docs.giskard.ai/en/latest/contribute/dev-environment.html" - ] - }, - { - "cell_type": "markdown", - "id": "934e834d6dd909af", - "metadata": { - "collapsed": false - }, - "source": [ - "## Import libraries" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "7024ed0a", - "metadata": { - "hidden": true - }, - "outputs": [], - "source": [ - "import os\n", - "import warnings\n", - "from absl import logging\n", - "from pathlib import Path\n", - "from urllib.request import urlretrieve\n", - "\n", - "import pandas as pd\n", - "from lightgbm import LGBMRegressor\n", - "from sklearn.metrics import r2_score\n", - "from sklearn.pipeline import Pipeline\n", - "from sklearn.compose import ColumnTransformer\n", - "from sklearn.model_selection import train_test_split\n", - "from sklearn.preprocessing import OneHotEncoder, StandardScaler\n", - "\n", - "from giskard import GiskardClient, testing, Dataset, Model, scan\n", - "\n", - "# Notebook-level settings.\n", - "logging.set_verbosity(logging.ERROR)\n", - "warnings.filterwarnings(\"ignore\", message=r\"Passing\", category=FutureWarning)" - ] - }, - { - "cell_type": "markdown", - "id": "ac16a52c0335a961", - "metadata": { - "collapsed": false - }, - "source": [ - "## Define constants" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "id": "434314c0d4cf31fb", - "metadata": { - "ExecuteTime": { - "end_time": "2023-08-21T13:40:32.315001Z", - "start_time": "2023-08-21T13:40:32.299814Z" + { + "cell_type": "markdown", + "id": "c1c0ae5d3f2ab62c", + "metadata": { + "collapsed": false + }, + "source": [ + "## Install necessary python packages" + ] }, - "collapsed": false - }, - "outputs": [], - "source": [ - "# Constants.\n", - "NUMERICAL_COLS = [\"bmi\", \"age\", \"children\"]\n", - "CATEGORICAL_COLS = [\"sex\", \"smoker\", \"region\"]\n", - "\n", - "# Paths.\n", - "DATA_URL = \"ftp://sys.giskard.ai/pub/unit_test_resources/insurance_prediction_dataset/us_health_insurance_dataset.csv\"\n", - "DATA_PATH = Path.home() / \".giskard\" / \"insurance_prediction_dataset\" / \"us_health_insurance_dataset.csv\"" - ] - }, - { - "cell_type": "markdown", - "id": "ed10d7dd4bbca1e0", - "metadata": { - "collapsed": false - }, - "source": [ - "## Dataset preparation" - ] - }, - { - "cell_type": "markdown", - "id": "a146b1f98d0a8e3f", - "metadata": { - "collapsed": false - }, - "source": [ - "### Load data" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "fe19935c186fd365", - "metadata": { - "ExecuteTime": { - "end_time": "2023-08-21T13:40:32.360012Z", - "start_time": "2023-08-21T13:40:32.309808Z" + { + "cell_type": "code", + "execution_count": null, + "id": "797dd8f836bd3a03", + "metadata": { + "ExecuteTime": { + "end_time": "2023-08-01T15:21:01.837060Z", + "start_time": "2023-08-01T15:21:01.831520Z" + }, + "collapsed": false + }, + "outputs": [], + "source": [ + "!pip install \"giskard>=2.0.0b\" -U\n", + "!pip install lightgbm" + ] }, - "collapsed": false - }, - "outputs": [], - "source": [ - "def fetch_from_ftp(url: str, file: Path) -> None:\n", - " \"\"\"Helper to fetch data from the FTP server.\"\"\"\n", - " if not file.parent.exists():\n", - " file.parent.mkdir(parents=True, exist_ok=True)\n", - "\n", - " if not file.exists():\n", - " print(f\"Downloading data from {url}\")\n", - " urlretrieve(url, file)\n", - "\n", - " print(f\"Data was loaded!\")\n", - "\n", - "\n", - "def download_data(**kwargs) -> pd.DataFrame:\n", - " \"\"\"Download the dataset using URL.\"\"\"\n", - " fetch_from_ftp(DATA_URL, DATA_PATH)\n", - " _df = pd.read_csv(DATA_PATH, **kwargs)\n", - " return _df" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "6832a4bf", - "metadata": {}, - "outputs": [], - "source": [ - "df = download_data()" - ] - }, - { - "cell_type": "markdown", - "id": "8213c9a5b37b94ce", - "metadata": { - "collapsed": false - }, - "source": [ - "### Train-test split" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "2a5351b1c97f2a31", - "metadata": { - "ExecuteTime": { - "end_time": "2023-08-21T13:40:34.772074Z", - "start_time": "2023-08-21T13:40:34.759052Z" + { + "cell_type": "markdown", + "id": "e37ff3bea8055af3", + "metadata": { + "collapsed": false + }, + "source": [ + "## Troubleshooting\n", + "\n", + "If you encounter a segmentation fault on macOS at any point during this tutorial, check: https://docs.giskard.ai/en/latest/contribute/dev-environment.html" + ] }, - "collapsed": false - }, - "outputs": [], - "source": [ - "X_train, X_test, y_train, y_test = train_test_split(df.drop(columns=[\"charges\"]), df.charges, random_state=0)" - ] - }, - { - "cell_type": "markdown", - "id": "121478a9aa05b571", - "metadata": { - "collapsed": false - }, - "source": [ - "### Wrap dataset with Giskard" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "id": "4d95d45185280742", - "metadata": { - "ExecuteTime": { - "end_time": "2023-08-21T13:40:35.269122Z", - "start_time": "2023-08-21T13:40:35.247828Z" + { + "cell_type": "markdown", + "id": "934e834d6dd909af", + "metadata": { + "collapsed": false + }, + "source": [ + "## Import libraries" + ] }, - "collapsed": false - }, - "outputs": [], - "source": [ - "raw_data = pd.concat([X_test, y_test], axis=1)\n", - "wrapped_data = Dataset(\n", - " df=raw_data, # A pandas.DataFrame that contains the raw data (before all the pre-processing steps) and the actual ground truth variable (target).\n", - " target=\"charges\", # Ground truth variable.\n", - " name=\"insurance dataset\", # Optional.\n", - " cat_columns=CATEGORICAL_COLS # List of categorical columns. Optional, but is a MUST if available. Inferred automatically if not.\n", - ")" - ] - }, - { - "cell_type": "markdown", - "id": "81b1ef1f34961763", - "metadata": { - "collapsed": false - }, - "source": [ - "## Model training" - ] - }, - { - "cell_type": "markdown", - "id": "8a0bf6e9a7a42831", - "metadata": { - "collapsed": false - }, - "source": [ - "### Define preprocessing pipeline" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "id": "64d42c05c8107e59", - "metadata": { - "ExecuteTime": { - "end_time": "2023-08-21T13:43:40.562654Z", - "start_time": "2023-08-21T13:43:40.546541Z" + { + "cell_type": "code", + "execution_count": null, + "id": "7024ed0a", + "metadata": { + "hidden": true + }, + "outputs": [], + "source": [ + "import os\n", + "import warnings\n", + "from absl import logging\n", + "from pathlib import Path\n", + "from urllib.request import urlretrieve\n", + "\n", + "import pandas as pd\n", + "from lightgbm import LGBMRegressor\n", + "from sklearn.metrics import r2_score\n", + "from sklearn.pipeline import Pipeline\n", + "from sklearn.compose import ColumnTransformer\n", + "from sklearn.model_selection import train_test_split\n", + "from sklearn.preprocessing import OneHotEncoder, StandardScaler\n", + "\n", + "from giskard import GiskardClient, testing, Dataset, Model, scan\n", + "\n", + "# Notebook-level settings.\n", + "logging.set_verbosity(logging.ERROR)\n", + "warnings.filterwarnings(\"ignore\", message=r\"Passing\", category=FutureWarning)" + ] + }, + { + "cell_type": "markdown", + "id": "ac16a52c0335a961", + "metadata": { + "collapsed": false + }, + "source": [ + "## Define constants" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "434314c0d4cf31fb", + "metadata": { + "ExecuteTime": { + "end_time": "2023-08-21T13:40:32.315001Z", + "start_time": "2023-08-21T13:40:32.299814Z" + }, + "collapsed": false + }, + "outputs": [], + "source": [ + "# Constants.\n", + "NUMERICAL_COLS = [\"bmi\", \"age\", \"children\"]\n", + "CATEGORICAL_COLS = [\"sex\", \"smoker\", \"region\"]\n", + "\n", + "# Paths.\n", + "DATA_URL = \"ftp://sys.giskard.ai/pub/unit_test_resources/insurance_prediction_dataset/us_health_insurance_dataset.csv\"\n", + "DATA_PATH = Path.home() / \".giskard\" / \"insurance_prediction_dataset\" / \"us_health_insurance_dataset.csv\"" + ] + }, + { + "cell_type": "markdown", + "id": "ed10d7dd4bbca1e0", + "metadata": { + "collapsed": false + }, + "source": [ + "## Dataset preparation" + ] + }, + { + "cell_type": "markdown", + "id": "a146b1f98d0a8e3f", + "metadata": { + "collapsed": false + }, + "source": [ + "### Load data" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "fe19935c186fd365", + "metadata": { + "ExecuteTime": { + "end_time": "2023-08-21T13:40:32.360012Z", + "start_time": "2023-08-21T13:40:32.309808Z" + }, + "collapsed": false + }, + "outputs": [], + "source": [ + "def fetch_from_ftp(url: str, file: Path) -> None:\n", + " \"\"\"Helper to fetch data from the FTP server.\"\"\"\n", + " if not file.parent.exists():\n", + " file.parent.mkdir(parents=True, exist_ok=True)\n", + "\n", + " if not file.exists():\n", + " print(f\"Downloading data from {url}\")\n", + " urlretrieve(url, file)\n", + "\n", + " print(f\"Data was loaded!\")\n", + "\n", + "\n", + "def download_data(**kwargs) -> pd.DataFrame:\n", + " \"\"\"Download the dataset using URL.\"\"\"\n", + " fetch_from_ftp(DATA_URL, DATA_PATH)\n", + " _df = pd.read_csv(DATA_PATH, **kwargs)\n", + " return _df" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6832a4bf", + "metadata": {}, + "outputs": [], + "source": [ + "df = download_data()" + ] + }, + { + "cell_type": "markdown", + "id": "8213c9a5b37b94ce", + "metadata": { + "collapsed": false + }, + "source": [ + "### Train-test split" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "2a5351b1c97f2a31", + "metadata": { + "ExecuteTime": { + "end_time": "2023-08-21T13:40:34.772074Z", + "start_time": "2023-08-21T13:40:34.759052Z" + }, + "collapsed": false + }, + "outputs": [], + "source": [ + "X_train, X_test, y_train, y_test = train_test_split(df.drop(columns=[\"charges\"]), df.charges, random_state=0)" + ] + }, + { + "cell_type": "markdown", + "id": "121478a9aa05b571", + "metadata": { + "collapsed": false + }, + "source": [ + "### Wrap dataset with Giskard" + ] }, - "collapsed": false - }, - "outputs": [], - "source": [ - "preprocessor = ColumnTransformer(transformers=[\n", - " (\"scaler\", StandardScaler(), NUMERICAL_COLS),\n", - " (\"one_hot_encoder\", OneHotEncoder(handle_unknown=\"ignore\", sparse=False), CATEGORICAL_COLS),\n", - "])" - ] - }, - { - "cell_type": "markdown", - "id": "495a8040bc6735f3", - "metadata": { - "collapsed": false - }, - "source": [ - "### Build estimator" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "e6332de6", - "metadata": {}, - "outputs": [], - "source": [ - "pipeline = Pipeline(steps=[\n", - " (\"preprocessor\", preprocessor),\n", - " (\"regressor\", LGBMRegressor(n_estimators=30))\n", - "])\n", - "\n", - "pipeline.fit(X_train, y_train)\n", - "\n", - "y_train_pred = pipeline.predict(X_train)\n", - "y_test_pred = pipeline.predict(X_test)\n", - "\n", - "train_r2 = r2_score(y_train, y_train_pred)\n", - "test_r2 = r2_score(y_test, y_test_pred)\n", - "\n", - "print(f\"Train R2-score: {train_r2:.2f}\")\n", - "print(f\"Test R2-score: {test_r2:.2f}\")" - ] - }, - { - "cell_type": "markdown", - "id": "078e028d", - "metadata": {}, - "source": [ - "### Wrap model with Giskard" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "1e13357c", - "metadata": {}, - "outputs": [], - "source": [ - "wrapped_model = Model(\n", - " model=pipeline.predict, # A prediction function that encapsulates all the data pre-processing steps and that could be executed with the dataset used by the scan.\n", - " model_type=\"regression\", # Either regression, classification or text_generation.\n", - " name=\"insurance model\", # Optional.\n", - " feature_names=X_train.columns # Default: all columns of your dataset.\n", - ")\n", - "\n", - "# Validate wrapped model.\n", - "wrapped_predict = wrapped_model.predict(wrapped_data)\n", - "wrapped_test_metric = r2_score(y_test, wrapped_predict.prediction)\n", - "\n", - "print(f'Wrapped Test R2-score: {wrapped_test_metric:.2f}')" - ] - }, - { - "cell_type": "markdown", - "id": "4439379d", - "metadata": {}, - "source": [ - "## Scan your model to find vulnerabilities\n", - "With the Giskard scan feature, you can detect vulnerabilities in your model, including performance biases, unrobustness, data leakage, stochasticity, underconfidence, ethical issues, and more. For detailed information about the scan feature, please refer to our scan documentation." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "e188fda7", - "metadata": { - "scrolled": true - }, - "outputs": [], - "source": [ - "results = scan(model=wrapped_model, dataset=wrapped_data)" - ] - }, - { - "cell_type": "code", - "execution_count": 17, - "id": "a52cd1f1", - "metadata": { - "ExecuteTime": { - "end_time": "2023-08-21T13:46:41.690516Z", - "start_time": "2023-08-21T13:46:41.133698Z" - } - }, - "outputs": [ { - "data": { - "text/html": [ - "\n", - "" + "cell_type": "code", + "execution_count": 6, + "id": "4d95d45185280742", + "metadata": { + "ExecuteTime": { + "end_time": "2023-08-21T13:40:35.269122Z", + "start_time": "2023-08-21T13:40:35.247828Z" + }, + "collapsed": false + }, + "outputs": [], + "source": [ + "raw_data = pd.concat([X_test, y_test], axis=1)\n", + "wrapped_data = Dataset(\n", + " df=raw_data, # A pandas.DataFrame that contains the raw data (before all the pre-processing steps) and the actual ground truth variable (target).\n", + " target=\"charges\", # Ground truth variable.\n", + " name=\"insurance dataset\", # Optional.\n", + " cat_columns=CATEGORICAL_COLS # List of categorical columns. Optional, but is a MUST if available. Inferred automatically if not.\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "81b1ef1f34961763", + "metadata": { + "collapsed": false + }, + "source": [ + "## Model training" + ] + }, + { + "cell_type": "markdown", + "id": "8a0bf6e9a7a42831", + "metadata": { + "collapsed": false + }, + "source": [ + "### Define preprocessing pipeline" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "64d42c05c8107e59", + "metadata": { + "ExecuteTime": { + "end_time": "2023-08-21T13:43:40.562654Z", + "start_time": "2023-08-21T13:43:40.546541Z" + }, + "collapsed": false + }, + "outputs": [], + "source": [ + "preprocessor = ColumnTransformer(transformers=[\n", + " (\"scaler\", StandardScaler(), NUMERICAL_COLS),\n", + " (\"one_hot_encoder\", OneHotEncoder(handle_unknown=\"ignore\", sparse=False), CATEGORICAL_COLS),\n", + "])" + ] + }, + { + "cell_type": "markdown", + "id": "495a8040bc6735f3", + "metadata": { + "collapsed": false + }, + "source": [ + "### Build estimator" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e6332de6", + "metadata": {}, + "outputs": [], + "source": [ + "pipeline = Pipeline(steps=[\n", + " (\"preprocessor\", preprocessor),\n", + " (\"regressor\", LGBMRegressor(n_estimators=30))\n", + "])\n", + "\n", + "pipeline.fit(X_train, y_train)\n", + "\n", + "y_train_pred = pipeline.predict(X_train)\n", + "y_test_pred = pipeline.predict(X_test)\n", + "\n", + "train_r2 = r2_score(y_train, y_train_pred)\n", + "test_r2 = r2_score(y_test, y_test_pred)\n", + "\n", + "print(f\"Train R2-score: {train_r2:.2f}\")\n", + "print(f\"Test R2-score: {test_r2:.2f}\")" + ] + }, + { + "cell_type": "markdown", + "id": "078e028d", + "metadata": {}, + "source": [ + "### Wrap model with Giskard" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1e13357c", + "metadata": {}, + "outputs": [], + "source": [ + "wrapped_model = Model(\n", + " model=pipeline.predict, # A prediction function that encapsulates all the data pre-processing steps and that could be executed with the dataset used by the scan.\n", + " model_type=\"regression\", # Either regression, classification or text_generation.\n", + " name=\"insurance model\", # Optional.\n", + " feature_names=X_train.columns # Default: all columns of your dataset.\n", + ")\n", + "\n", + "# Validate wrapped model.\n", + "wrapped_predict = wrapped_model.predict(wrapped_data)\n", + "wrapped_test_metric = r2_score(y_test, wrapped_predict.prediction)\n", + "\n", + "print(f'Wrapped Test R2-score: {wrapped_test_metric:.2f}')" + ] + }, + { + "cell_type": "markdown", + "id": "4439379d", + "metadata": {}, + "source": [ + "## Scan your model to find vulnerabilities\n", + "With the Giskard scan feature, you can detect vulnerabilities in your model, including performance biases, unrobustness, data leakage, stochasticity, underconfidence, ethical issues, and more. For detailed information about the scan feature, please refer to our scan documentation." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e188fda7", + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "results = scan(model=wrapped_model, dataset=wrapped_data)" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "a52cd1f1", + "metadata": { + "ExecuteTime": { + "end_time": "2023-08-21T13:46:41.690516Z", + "start_time": "2023-08-21T13:46:41.133698Z" + } + }, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "display(results)" + ] + }, + { + "cell_type": "markdown", + "id": "cf658ece", + "metadata": {}, + "source": [ + "## Generate a test suite from the Scan\n", + "The objects produced by the scan can be used as fixtures to generate a test suite that integrate domain-specific issues. To create custom tests, refer to the Test your ML Model page." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "19c8c07c", + "metadata": {}, + "outputs": [], + "source": [ + "test_suite = results.generate_test_suite(\"Test suite\")\n", + "test_suite.run()" + ] + }, + { + "cell_type": "markdown", + "id": "52bba57047934b96", + "metadata": { + "collapsed": false + }, + "source": [ + "## Customize your suite by loading objects from the Giskard catalog\n", + "\n", + "The Giskard open source catalog will enable to load:\n", + "\n", + "* Tests such as metamorphic, performance, prediction & data drift, statistical tests, etc\n", + "* Slicing functions such as detectors of toxicity, hate, emotion, etc\n", + "* Transformation functions such as generators of typos, paraphrase, style tune, etc\n", + "\n", + "For demo purposes, we will load a simple unit test (test_rmse) that checks if the test RMSE score is above the given threshold. For more examples of tests and functions, refer to the Giskard catalog." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b4d206ac223e83c4", + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "test_suite.add_test(testing.test_rmse(model=wrapped_model, dataset=wrapped_data, threshold=10.0)).run()" + ] + }, + { + "cell_type": "markdown", + "id": "4a31adf6", + "metadata": {}, + "source": [ + "## Upload your suite to the Giskard server\n", + "\n", + "Upload your suite to the Giskard server to:\n", + "\n", + "* Compare models to decide which model to promote\n", + "* Debug your tests to diagnose the issues\n", + "* Create more domain-specific tests that are integrating business feedback\n", + "* Share your results" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3e139194b2d1076", + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# Uploading the test suite will automatically save the model, dataset, tests, slicing & transformation functions inside the Giskard UI server\n", + "# Create a Giskard client after having install the Giskard server (see documentation)\n", + "token = \"API_TOKEN\" # Find it in Settings in the Giskard server\n", + "\n", + "client = GiskardClient(\n", + " url=\"http://localhost:19000\", # URL of your Giskard instance\n", + " token=token\n", + ")\n", + "\n", + "my_project = client.create_project(\"my_project\", \"PROJECT_NAME\", \"DESCRIPTION\")\n", + "\n", + "# Upload to the current project ✉️\n", + "test_suite.upload(client, \"my_project\")" + ] + }, + { + "cell_type": "markdown", + "id": "341a0693c256f36d", + "metadata": { + "collapsed": false + }, + "source": [ + "
\n", + "Connecting Google Colab with the Giskard server\n", + "\n", + "If you are using Google Colab and you want to install the Giskard server **locally**, you can run the Giskard server by executing this line in the terminal of your **local** machine (see the [documentation](https://docs.giskard.ai/en/latest/guides/installation_app/index.html)):\n", + "\n", + "> giskard server start\n", + "\n", + "Once the Giskard server is running, from the same terminal on your **local** machine, you can run:\n", + "\n", + "> giskard server expose --ngrok-token \\\n", + "\n", + "Read the flowing [instructions](https://docs.giskard.ai/en/latest/cli/ngrok/index.html) in order to get the `ngrok_API_token`. This will provide you with the code snippets that you can copy and paste into your Colab notebook to establish a connection with your locally installed Giskard server\n", + "
" ] - }, - "metadata": {}, - "output_type": "display_data" } - ], - "source": [ - "display(results)" - ] - }, - { - "cell_type": "markdown", - "id": "cf658ece", - "metadata": {}, - "source": [ - "## Generate a test suite from the Scan\n", - "The objects produced by the scan can be used as fixtures to generate a test suite that integrate domain-specific issues. To create custom tests, refer to the Test your ML Model page." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "19c8c07c", - "metadata": {}, - "outputs": [], - "source": [ - "test_suite = results.generate_test_suite(\"Test suite\")\n", - "test_suite.run()" - ] - }, - { - "cell_type": "markdown", - "id": "52bba57047934b96", - "metadata": { - "collapsed": false - }, - "source": [ - "## Customize your suite by loading objects from the Giskard catalog\n", - "\n", - "The Giskard open source catalog will enable to load:\n", - "\n", - "* Tests such as metamorphic, performance, prediction & data drift, statistical tests, etc\n", - "* Slicing functions such as detectors of toxicity, hate, emotion, etc\n", - "* Transformation functions such as generators of typos, paraphrase, style tune, etc\n", - "\n", - "For demo purposes, we will load a simple unit test (test_rmse) that checks if the test RMSE score is above the given threshold. For more examples of tests and functions, refer to the Giskard catalog." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "b4d206ac223e83c4", - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "test_suite.add_test(testing.test_rmse(model=wrapped_model, dataset=wrapped_data, threshold=10.0)).run()" - ] - }, - { - "cell_type": "markdown", - "id": "4a31adf6", - "metadata": {}, - "source": [ - "## Upload your suite to the Giskard server\n", - "\n", - "Upload your suite to the Giskard server to:\n", - "\n", - "* Compare models to decide which model to promote\n", - "* Debug your tests to diagnose the issues\n", - "* Create more domain-specific tests that are integrating business feedback\n", - "* Share your results" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "3e139194b2d1076", - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "# Uploading the test suite will automatically save the model, dataset, tests, slicing & transformation functions inside the Giskard UI server\n", - "# Create a Giskard client after having install the Giskard server (see documentation)\n", - "token = \"API_TOKEN\" # Find it in Settings in the Giskard server\n", - "\n", - "client = GiskardClient(\n", - " url=\"http://localhost:19000\", # URL of your Giskard instance\n", - " token=token\n", - ")\n", - "\n", - "my_project = client.create_project(\"my_project\", \"PROJECT_NAME\", \"DESCRIPTION\")\n", - "\n", - "# Upload to the current project ✉️\n", - "test_suite.upload(client, \"my_project\")" - ] - }, - { - "cell_type": "markdown", - "id": "341a0693c256f36d", - "metadata": { - "collapsed": false - }, - "source": [ - "
\n", - "Connecting Google Colab with the Giskard server\n", - "\n", - "If you are using Google Colab and you want to install the Giskard server **locally**, you can run the Giskard server by executing this line in the terminal of your **local** machine (see the [documentation](https://docs.giskard.ai/en/latest/guides/installation_app/index.html)):\n", - "\n", - "> giskard server start\n", - "\n", - "Once the Giskard server is running, from the same terminal on your **local** machine, you can run:\n", - "\n", - "> giskard server expose --token \\\n", - "\n", - "Read the flowing [instructions](https://docs.giskard.ai/en/latest/cli/ngrok/index.html) in order to get the `ngrok_API_token`. This will provide you with the code snippets that you can copy and paste into your Colab notebook to establish a connection with your locally installed Giskard server\n", - "
" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.10.12" - }, - "toc": { - "base_numbering": "0", - "nav_menu": {}, - "number_sections": true, - "sideBar": false, - "skip_h1_title": false, - "title_cell": "Table of Contents", - "title_sidebar": "Contents", - "toc_cell": false, - "toc_position": { - "height": "788px", - "left": "10px", - "top": "150px", - "width": "341px" - }, - "toc_section_display": true, - "toc_window_display": true + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.12" + }, + "toc": { + "base_numbering": "0", + "nav_menu": {}, + "number_sections": true, + "sideBar": false, + "skip_h1_title": false, + "title_cell": "Table of Contents", + "title_sidebar": "Contents", + "toc_cell": false, + "toc_position": { + "height": "788px", + "left": "10px", + "top": "150px", + "width": "341px" + }, + "toc_section_display": true, + "toc_window_display": true + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } }, - "widgets": { - "application/vnd.jupyter.widget-state+json": { - "state": {}, - "version_major": 2, - "version_minor": 0 - } - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} + "nbformat": 4, + "nbformat_minor": 5 +} \ No newline at end of file diff --git a/python-client/docs/reference/notebooks/medical_transcript_classification_sklearn.ipynb b/python-client/docs/reference/notebooks/medical_transcript_classification_sklearn.ipynb index 3c12395073..f4c4862a40 100644 --- a/python-client/docs/reference/notebooks/medical_transcript_classification_sklearn.ipynb +++ b/python-client/docs/reference/notebooks/medical_transcript_classification_sklearn.ipynb @@ -1,4354 +1,4354 @@ { - "cells": [ - { - "cell_type": "markdown", - "id": "4aca7fc853806c93", - "metadata": { - "collapsed": false - }, - "source": [ - "# Medical transcript classification [sklearn]\n", - "* Multiclass classification of medical transcript.\n", - "* Reference notebook: \n", - "* Dataset: \n", - "\n", - "By running this notebook, you’ll create a whole test suite in a few lines of code. The model used here is a Random Forest classification model with the medical transcript dataset. Feel free to use your own model (tabular, text, or LLM).\n", - "\n", - "You’ll learn how to:\n", - "\n", - "* Detect vulnerabilities by scanning the model\n", - "* Generate a test suite with domain-specific tests\n", - "* Customize your test suite by loading a test from the Giskard catalog\n", - "* Upload your model to the Giskard server to:\n", - " * Compare models to decide which one to promote\n", - " * Debug your tests to diagnose issues\n", - " * Share your results and collect business feedback from your team" - ] - }, - { - "cell_type": "markdown", - "id": "4e258515e57c6fb4", - "metadata": { - "collapsed": false - }, - "source": [ - "## Install Giskard" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "3b4090a3da481df6", - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "pip install \"giskard>=2.0.0b\" -U" - ] - }, - { - "cell_type": "markdown", - "id": "c366c8f9bfc19e0e", - "metadata": { - "collapsed": false - }, - "source": [ - "## Install necessary dependencies" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "b2456f963f403075", - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "!pip install nltk" - ] - }, - { - "cell_type": "markdown", - "id": "1db9217677bba1d3", - "metadata": { - "collapsed": false - }, - "source": [ - "## Import libraries" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "14e64fb17dd952c", - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "import os\n", - "import string\n", - "from pathlib import Path\n", - "from typing import Iterable\n", - "from urllib.request import urlretrieve\n", - "\n", - "import nltk\n", - "import pandas as pd\n", - "from nltk.corpus import stopwords\n", - "from nltk.stem.snowball import SnowballStemmer\n", - "from sklearn.ensemble import RandomForestClassifier\n", - "from sklearn.feature_extraction.text import CountVectorizer\n", - "from sklearn.metrics import classification_report\n", - "from sklearn.model_selection import train_test_split\n", - "from sklearn.pipeline import Pipeline\n", - "from sklearn.preprocessing import FunctionTransformer\n", - "\n", - "import giskard\n", - "from giskard import Dataset, Model, GiskardClient, testing" - ] - }, - { - "cell_type": "markdown", - "id": "fab2da98face6468", - "metadata": { - "collapsed": false - }, - "source": [ - "## Download NLTK stopwords corpus" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "5597019d912e3761", - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "# Download list of english stopwords.\n", - "nltk.download('stopwords')" - ] - }, - { - "cell_type": "markdown", - "id": "76c9422db247a11f", - "metadata": { - "collapsed": false - }, - "source": [ - "## Define constants" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "eb6ddf97e5bfaa17", - "metadata": { - "ExecuteTime": { - "end_time": "2023-08-21T13:54:54.189779Z", - "start_time": "2023-08-21T13:54:54.136775Z" + "cells": [ + { + "cell_type": "markdown", + "id": "4aca7fc853806c93", + "metadata": { + "collapsed": false + }, + "source": [ + "# Medical transcript classification [sklearn]\n", + "* Multiclass classification of medical transcript.\n", + "* Reference notebook: \n", + "* Dataset: \n", + "\n", + "By running this notebook, you’ll create a whole test suite in a few lines of code. The model used here is a Random Forest classification model with the medical transcript dataset. Feel free to use your own model (tabular, text, or LLM).\n", + "\n", + "You’ll learn how to:\n", + "\n", + "* Detect vulnerabilities by scanning the model\n", + "* Generate a test suite with domain-specific tests\n", + "* Customize your test suite by loading a test from the Giskard catalog\n", + "* Upload your model to the Giskard server to:\n", + " * Compare models to decide which one to promote\n", + " * Debug your tests to diagnose issues\n", + " * Share your results and collect business feedback from your team" + ] }, - "collapsed": false - }, - "outputs": [], - "source": [ - "# Constants.\n", - "LABELS_LIST = [\n", - " 'Neurosurgery',\n", - " 'ENT - Otolaryngology',\n", - " 'Discharge Summary',\n", - " 'General Medicine',\n", - " 'Gastroenterology',\n", - " 'Neurology',\n", - " 'SOAP / Chart / Progress Notes',\n", - " 'Obstetrics / Gynecology',\n", - " 'Urology'\n", - "]\n", - "\n", - "TEXT_COLUMN_NAME = \"transcription\"\n", - "TARGET_COLUMN_NAME = \"medical_specialty\"\n", - "\n", - "RANDOM_SEED = 8888\n", - "\n", - "# Data.\n", - "DATA_URL = \"ftp://sys.giskard.ai/pub/unit_test_resources/medical_transcript_classification_dataset/mtsamples.csv\"\n", - "DATA_PATH = Path.home() / \".giskard\" / \"medical_transcript_classification_dataset\" / \"mtsamples.csv\"" - ] - }, - { - "cell_type": "markdown", - "id": "4dc59c5d5e7f1b3a", - "metadata": { - "collapsed": false - }, - "source": [ - "## Dataset preparation" - ] - }, - { - "cell_type": "markdown", - "id": "ec22e5ac312345e0", - "metadata": { - "collapsed": false - }, - "source": [ - "### Load data" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "2016f55da2fb2636", - "metadata": { - "ExecuteTime": { - "end_time": "2023-08-21T13:54:54.204610Z", - "start_time": "2023-08-21T13:54:54.156914Z" + { + "cell_type": "markdown", + "id": "4e258515e57c6fb4", + "metadata": { + "collapsed": false + }, + "source": [ + "## Install Giskard" + ] }, - "collapsed": false - }, - "outputs": [], - "source": [ - "def fetch_from_ftp(url: str, file: Path) -> None:\n", - " \"\"\"Helper to fetch data from the FTP server.\"\"\"\n", - " if not file.parent.exists():\n", - " file.parent.mkdir(parents=True, exist_ok=True)\n", - "\n", - " if not file.exists():\n", - " print(f\"Downloading data from {url}\")\n", - " urlretrieve(url, file)\n", - "\n", - " print(f\"Data was loaded!\")\n", - "\n", - "\n", - "def load_data() -> pd.DataFrame:\n", - " \"\"\"Load and initially preprocess data.\"\"\"\n", - " fetch_from_ftp(DATA_URL, DATA_PATH)\n", - "\n", - " df = pd.read_csv(DATA_PATH)\n", - "\n", - " # Drop useless columns.\n", - " df = df.drop(columns=['Unnamed: 0', \"description\", \"sample_name\", \"keywords\"])\n", - "\n", - " # Trim text.\n", - " df = df.apply(lambda x: x.str.strip())\n", - "\n", - " # Filter samples by label.\n", - " df = df[df[TARGET_COLUMN_NAME].isin(LABELS_LIST)]\n", - "\n", - " # Drop rows with no transcript.\n", - " df = df[df[TEXT_COLUMN_NAME].notna()]\n", - "\n", - " return df" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "1ae698a321b92150", - "metadata": { - "ExecuteTime": { - "end_time": "2023-08-21T13:54:54.510056Z", - "start_time": "2023-08-21T13:54:54.169274Z" + { + "cell_type": "code", + "execution_count": null, + "id": "3b4090a3da481df6", + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "pip install \"giskard>=2.0.0b\" -U" + ] }, - "collapsed": false - }, - "outputs": [ { - "name": "stdout", - "output_type": "stream", - "text": [ - "Data was loaded!\n" - ] - } - ], - "source": [ - "transcript_df = load_data()" - ] - }, - { - "cell_type": "markdown", - "id": "dff3f8d6ee61422c", - "metadata": { - "collapsed": false - }, - "source": [ - "### Train-test split" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "id": "ef066b868f02dea0", - "metadata": { - "ExecuteTime": { - "end_time": "2023-08-21T13:54:54.593655Z", - "start_time": "2023-08-21T13:54:54.399254Z" + "cell_type": "markdown", + "id": "c366c8f9bfc19e0e", + "metadata": { + "collapsed": false + }, + "source": [ + "## Install necessary dependencies" + ] }, - "collapsed": false - }, - "outputs": [], - "source": [ - "X_train, X_test, y_train, y_test = train_test_split(transcript_df[[TEXT_COLUMN_NAME]],\n", - " transcript_df[TARGET_COLUMN_NAME],\n", - " random_state=RANDOM_SEED)" - ] - }, - { - "cell_type": "markdown", - "id": "80ecb6b9b192d5d1", - "metadata": { - "collapsed": false - }, - "source": [ - "### Wrap dataset with Giskard" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "id": "2eadc4944d498729", - "metadata": { - "ExecuteTime": { - "end_time": "2023-08-21T13:58:17.344335Z", - "start_time": "2023-08-21T13:58:17.308020Z" + { + "cell_type": "code", + "execution_count": null, + "id": "b2456f963f403075", + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "!pip install nltk" + ] }, - "collapsed": false - }, - "outputs": [], - "source": [ - "raw_data = pd.concat([X_test, y_test], axis=1)\n", - "wrapped_data = Dataset(\n", - " df=raw_data, # A pandas.DataFrame that contains the raw data (before all the pre-processing steps) and the actual ground truth variable (target).\n", - " name=\"medical_transcript_dataset\", # Ground truth variable.\n", - " target=TARGET_COLUMN_NAME # Optional.\n", - ")" - ] - }, - { - "cell_type": "markdown", - "id": "f6ca9e04617e8c31", - "metadata": { - "collapsed": false - }, - "source": [ - "## Model training" - ] - }, - { - "cell_type": "markdown", - "id": "31c55dffcc266034", - "metadata": { - "collapsed": false - }, - "source": [ - "### Define preprocessing steps" - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "id": "cc4c51a3519004b1", - "metadata": { - "ExecuteTime": { - "end_time": "2023-08-21T13:58:17.953060Z", - "start_time": "2023-08-21T13:58:17.936320Z" + { + "cell_type": "markdown", + "id": "1db9217677bba1d3", + "metadata": { + "collapsed": false + }, + "source": [ + "## Import libraries" + ] }, - "collapsed": false - }, - "outputs": [], - "source": [ - "stemmer = SnowballStemmer(\"english\")\n", - "stop_words = stopwords.words(\"english\")\n", - "\n", - "\n", - "def preprocess_text(df: pd.DataFrame) -> pd.DataFrame:\n", - " \"\"\"Preprocess text.\"\"\"\n", - " # Lower.\n", - " df[TEXT_COLUMN_NAME] = df[TEXT_COLUMN_NAME].apply(lambda x: x.lower())\n", - "\n", - " # Remove punctuation.\n", - " df[TEXT_COLUMN_NAME] = df[TEXT_COLUMN_NAME].apply(lambda x: x.translate(str.maketrans('', '', string.punctuation)))\n", - "\n", - " # Tokenize.\n", - " df[TEXT_COLUMN_NAME] = df[TEXT_COLUMN_NAME].apply(lambda x: x.split())\n", - "\n", - " # Stem.\n", - " df[TEXT_COLUMN_NAME] = df[TEXT_COLUMN_NAME].apply(lambda x: [stemmer.stem(word) for word in x])\n", - "\n", - " # Remove stop-words.\n", - " df[TEXT_COLUMN_NAME] = df[TEXT_COLUMN_NAME].apply(\n", - " lambda x: ' '.join([word for word in x if word not in stop_words]))\n", - "\n", - " return df\n", - "\n", - "\n", - "def adapt_vectorizer_input(df: pd.DataFrame) -> Iterable:\n", - " \"\"\"Adapt input for the vectorizers.\n", - "\n", - " The problem is that vectorizers accept iterable, not DataFrame, but Series. Thus, we need to ravel dataframe with text have input single dimension.\n", - " Issue reference: https://stackoverflow.com/questions/50665240/valueerror-found-input-variables-with-inconsistent-numbers-of-samples-1-3185\"\"\"\n", - "\n", - " df = df.iloc[:, 0]\n", - " return df\n", - "\n", - "\n", - "text_preprocessor = FunctionTransformer(preprocess_text)\n", - "vectorizer_input_adapter = FunctionTransformer(adapt_vectorizer_input)" - ] - }, - { - "cell_type": "markdown", - "id": "59100c101bc2ae30", - "metadata": { - "collapsed": false - }, - "source": [ - "### Build estimator" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "ab98591d4cec24e3", - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "pipeline = Pipeline(steps=[\n", - " (\"text_preprocessor\", text_preprocessor),\n", - " (\"vectorizer_input_adapter\", vectorizer_input_adapter),\n", - " (\"vectorizer\", CountVectorizer(ngram_range=(1, 1))),\n", - " (\"estimator\", RandomForestClassifier(random_state=RANDOM_SEED))\n", - "])\n", - "\n", - "pipeline.fit(X_train, y_train)\n", - "y_pred = pipeline.predict(X_test)\n", - "\n", - "print(classification_report(y_test, y_pred))" - ] - }, - { - "cell_type": "markdown", - "id": "6447dbf318e87723", - "metadata": { - "collapsed": false - }, - "source": [ - "### Wrap model with Giskard" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "faf54fbaf32cca34", - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "wrapped_model = Model(\n", - " model=pipeline.predict_proba, # A prediction function that encapsulates all the data pre-processing steps and that could be executed with the dataset used by the scan.\n", - " model_type=\"classification\", # Either regression, classification or text_generation.\n", - " name=\"medical_transcript_classification\", # Optional.\n", - " classification_labels=pipeline.classes_, # Their order MUST be identical to the prediction_function's output order.\n", - " feature_names=[TEXT_COLUMN_NAME] # Default: all columns of your dataset.\n", - ")\n", - "\n", - "# Validate wrapped model and data.\n", - "print(classification_report(y_test, pipeline.classes_[wrapped_model.predict(wrapped_data).raw_prediction]))" - ] - }, - { - "cell_type": "markdown", - "id": "2a88e74a9f682208", - "metadata": { - "collapsed": false - }, - "source": [ - "## Scan your model to find vulnerabilities\n", - "With the Giskard scan feature, you can detect vulnerabilities in your model, including performance biases, unrobustness, data leakage, stochasticity, underconfidence, ethical issues, and more. For detailed information about the scan feature, please refer to our scan documentation." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "67f5f61eb78b32be", - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "results = giskard.scan(wrapped_model, wrapped_data)" - ] - }, - { - "cell_type": "code", - "execution_count": 20, - "id": "eb4a2acdff290603", - "metadata": { - "ExecuteTime": { - "end_time": "2023-08-21T14:06:44.568514Z", - "start_time": "2023-08-21T14:06:43.148630Z" + { + "cell_type": "code", + "execution_count": null, + "id": "14e64fb17dd952c", + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "import os\n", + "import string\n", + "from pathlib import Path\n", + "from typing import Iterable\n", + "from urllib.request import urlretrieve\n", + "\n", + "import nltk\n", + "import pandas as pd\n", + "from nltk.corpus import stopwords\n", + "from nltk.stem.snowball import SnowballStemmer\n", + "from sklearn.ensemble import RandomForestClassifier\n", + "from sklearn.feature_extraction.text import CountVectorizer\n", + "from sklearn.metrics import classification_report\n", + "from sklearn.model_selection import train_test_split\n", + "from sklearn.pipeline import Pipeline\n", + "from sklearn.preprocessing import FunctionTransformer\n", + "\n", + "import giskard\n", + "from giskard import Dataset, Model, GiskardClient, testing" + ] + }, + { + "cell_type": "markdown", + "id": "fab2da98face6468", + "metadata": { + "collapsed": false + }, + "source": [ + "## Download NLTK stopwords corpus" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5597019d912e3761", + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# Download list of english stopwords.\n", + "nltk.download('stopwords')" + ] + }, + { + "cell_type": "markdown", + "id": "76c9422db247a11f", + "metadata": { + "collapsed": false + }, + "source": [ + "## Define constants" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "eb6ddf97e5bfaa17", + "metadata": { + "ExecuteTime": { + "end_time": "2023-08-21T13:54:54.189779Z", + "start_time": "2023-08-21T13:54:54.136775Z" + }, + "collapsed": false + }, + "outputs": [], + "source": [ + "# Constants.\n", + "LABELS_LIST = [\n", + " 'Neurosurgery',\n", + " 'ENT - Otolaryngology',\n", + " 'Discharge Summary',\n", + " 'General Medicine',\n", + " 'Gastroenterology',\n", + " 'Neurology',\n", + " 'SOAP / Chart / Progress Notes',\n", + " 'Obstetrics / Gynecology',\n", + " 'Urology'\n", + "]\n", + "\n", + "TEXT_COLUMN_NAME = \"transcription\"\n", + "TARGET_COLUMN_NAME = \"medical_specialty\"\n", + "\n", + "RANDOM_SEED = 8888\n", + "\n", + "# Data.\n", + "DATA_URL = \"ftp://sys.giskard.ai/pub/unit_test_resources/medical_transcript_classification_dataset/mtsamples.csv\"\n", + "DATA_PATH = Path.home() / \".giskard\" / \"medical_transcript_classification_dataset\" / \"mtsamples.csv\"" + ] + }, + { + "cell_type": "markdown", + "id": "4dc59c5d5e7f1b3a", + "metadata": { + "collapsed": false + }, + "source": [ + "## Dataset preparation" + ] + }, + { + "cell_type": "markdown", + "id": "ec22e5ac312345e0", + "metadata": { + "collapsed": false + }, + "source": [ + "### Load data" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "2016f55da2fb2636", + "metadata": { + "ExecuteTime": { + "end_time": "2023-08-21T13:54:54.204610Z", + "start_time": "2023-08-21T13:54:54.156914Z" + }, + "collapsed": false + }, + "outputs": [], + "source": [ + "def fetch_from_ftp(url: str, file: Path) -> None:\n", + " \"\"\"Helper to fetch data from the FTP server.\"\"\"\n", + " if not file.parent.exists():\n", + " file.parent.mkdir(parents=True, exist_ok=True)\n", + "\n", + " if not file.exists():\n", + " print(f\"Downloading data from {url}\")\n", + " urlretrieve(url, file)\n", + "\n", + " print(f\"Data was loaded!\")\n", + "\n", + "\n", + "def load_data() -> pd.DataFrame:\n", + " \"\"\"Load and initially preprocess data.\"\"\"\n", + " fetch_from_ftp(DATA_URL, DATA_PATH)\n", + "\n", + " df = pd.read_csv(DATA_PATH)\n", + "\n", + " # Drop useless columns.\n", + " df = df.drop(columns=['Unnamed: 0', \"description\", \"sample_name\", \"keywords\"])\n", + "\n", + " # Trim text.\n", + " df = df.apply(lambda x: x.str.strip())\n", + "\n", + " # Filter samples by label.\n", + " df = df[df[TARGET_COLUMN_NAME].isin(LABELS_LIST)]\n", + "\n", + " # Drop rows with no transcript.\n", + " df = df[df[TEXT_COLUMN_NAME].notna()]\n", + "\n", + " return df" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "1ae698a321b92150", + "metadata": { + "ExecuteTime": { + "end_time": "2023-08-21T13:54:54.510056Z", + "start_time": "2023-08-21T13:54:54.169274Z" + }, + "collapsed": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Data was loaded!\n" + ] + } + ], + "source": [ + "transcript_df = load_data()" + ] + }, + { + "cell_type": "markdown", + "id": "dff3f8d6ee61422c", + "metadata": { + "collapsed": false + }, + "source": [ + "### Train-test split" + ] }, - "collapsed": false - }, - "outputs": [ { - "data": { - "text/html": [ - "\n", - "" + "cell_type": "code", + "execution_count": 6, + "id": "ef066b868f02dea0", + "metadata": { + "ExecuteTime": { + "end_time": "2023-08-21T13:54:54.593655Z", + "start_time": "2023-08-21T13:54:54.399254Z" + }, + "collapsed": false + }, + "outputs": [], + "source": [ + "X_train, X_test, y_train, y_test = train_test_split(transcript_df[[TEXT_COLUMN_NAME]],\n", + " transcript_df[TARGET_COLUMN_NAME],\n", + " random_state=RANDOM_SEED)" + ] + }, + { + "cell_type": "markdown", + "id": "80ecb6b9b192d5d1", + "metadata": { + "collapsed": false + }, + "source": [ + "### Wrap dataset with Giskard" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "2eadc4944d498729", + "metadata": { + "ExecuteTime": { + "end_time": "2023-08-21T13:58:17.344335Z", + "start_time": "2023-08-21T13:58:17.308020Z" + }, + "collapsed": false + }, + "outputs": [], + "source": [ + "raw_data = pd.concat([X_test, y_test], axis=1)\n", + "wrapped_data = Dataset(\n", + " df=raw_data, # A pandas.DataFrame that contains the raw data (before all the pre-processing steps) and the actual ground truth variable (target).\n", + " name=\"medical_transcript_dataset\", # Ground truth variable.\n", + " target=TARGET_COLUMN_NAME # Optional.\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "f6ca9e04617e8c31", + "metadata": { + "collapsed": false + }, + "source": [ + "## Model training" + ] + }, + { + "cell_type": "markdown", + "id": "31c55dffcc266034", + "metadata": { + "collapsed": false + }, + "source": [ + "### Define preprocessing steps" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "cc4c51a3519004b1", + "metadata": { + "ExecuteTime": { + "end_time": "2023-08-21T13:58:17.953060Z", + "start_time": "2023-08-21T13:58:17.936320Z" + }, + "collapsed": false + }, + "outputs": [], + "source": [ + "stemmer = SnowballStemmer(\"english\")\n", + "stop_words = stopwords.words(\"english\")\n", + "\n", + "\n", + "def preprocess_text(df: pd.DataFrame) -> pd.DataFrame:\n", + " \"\"\"Preprocess text.\"\"\"\n", + " # Lower.\n", + " df[TEXT_COLUMN_NAME] = df[TEXT_COLUMN_NAME].apply(lambda x: x.lower())\n", + "\n", + " # Remove punctuation.\n", + " df[TEXT_COLUMN_NAME] = df[TEXT_COLUMN_NAME].apply(lambda x: x.translate(str.maketrans('', '', string.punctuation)))\n", + "\n", + " # Tokenize.\n", + " df[TEXT_COLUMN_NAME] = df[TEXT_COLUMN_NAME].apply(lambda x: x.split())\n", + "\n", + " # Stem.\n", + " df[TEXT_COLUMN_NAME] = df[TEXT_COLUMN_NAME].apply(lambda x: [stemmer.stem(word) for word in x])\n", + "\n", + " # Remove stop-words.\n", + " df[TEXT_COLUMN_NAME] = df[TEXT_COLUMN_NAME].apply(\n", + " lambda x: ' '.join([word for word in x if word not in stop_words]))\n", + "\n", + " return df\n", + "\n", + "\n", + "def adapt_vectorizer_input(df: pd.DataFrame) -> Iterable:\n", + " \"\"\"Adapt input for the vectorizers.\n", + "\n", + " The problem is that vectorizers accept iterable, not DataFrame, but Series. Thus, we need to ravel dataframe with text have input single dimension.\n", + " Issue reference: https://stackoverflow.com/questions/50665240/valueerror-found-input-variables-with-inconsistent-numbers-of-samples-1-3185\"\"\"\n", + "\n", + " df = df.iloc[:, 0]\n", + " return df\n", + "\n", + "\n", + "text_preprocessor = FunctionTransformer(preprocess_text)\n", + "vectorizer_input_adapter = FunctionTransformer(adapt_vectorizer_input)" + ] + }, + { + "cell_type": "markdown", + "id": "59100c101bc2ae30", + "metadata": { + "collapsed": false + }, + "source": [ + "### Build estimator" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ab98591d4cec24e3", + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "pipeline = Pipeline(steps=[\n", + " (\"text_preprocessor\", text_preprocessor),\n", + " (\"vectorizer_input_adapter\", vectorizer_input_adapter),\n", + " (\"vectorizer\", CountVectorizer(ngram_range=(1, 1))),\n", + " (\"estimator\", RandomForestClassifier(random_state=RANDOM_SEED))\n", + "])\n", + "\n", + "pipeline.fit(X_train, y_train)\n", + "y_pred = pipeline.predict(X_test)\n", + "\n", + "print(classification_report(y_test, y_pred))" + ] + }, + { + "cell_type": "markdown", + "id": "6447dbf318e87723", + "metadata": { + "collapsed": false + }, + "source": [ + "### Wrap model with Giskard" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "faf54fbaf32cca34", + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "wrapped_model = Model(\n", + " model=pipeline.predict_proba, # A prediction function that encapsulates all the data pre-processing steps and that could be executed with the dataset used by the scan.\n", + " model_type=\"classification\", # Either regression, classification or text_generation.\n", + " name=\"medical_transcript_classification\", # Optional.\n", + " classification_labels=pipeline.classes_, # Their order MUST be identical to the prediction_function's output order.\n", + " feature_names=[TEXT_COLUMN_NAME] # Default: all columns of your dataset.\n", + ")\n", + "\n", + "# Validate wrapped model and data.\n", + "print(classification_report(y_test, pipeline.classes_[wrapped_model.predict(wrapped_data).raw_prediction]))" + ] + }, + { + "cell_type": "markdown", + "id": "2a88e74a9f682208", + "metadata": { + "collapsed": false + }, + "source": [ + "## Scan your model to find vulnerabilities\n", + "With the Giskard scan feature, you can detect vulnerabilities in your model, including performance biases, unrobustness, data leakage, stochasticity, underconfidence, ethical issues, and more. For detailed information about the scan feature, please refer to our scan documentation." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "67f5f61eb78b32be", + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "results = giskard.scan(wrapped_model, wrapped_data)" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "id": "eb4a2acdff290603", + "metadata": { + "ExecuteTime": { + "end_time": "2023-08-21T14:06:44.568514Z", + "start_time": "2023-08-21T14:06:43.148630Z" + }, + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "display(results)" + ] + }, + { + "cell_type": "markdown", + "id": "fbc07e90f1d830b9", + "metadata": { + "collapsed": false + }, + "source": [ + "## Generate a test suite from the Scan\n", + "The objects produced by the scan can be used as fixtures to generate a test suite that integrate domain-specific issues. To create custom tests, refer to the Test your ML Model page." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e740cee558970a9c", + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "test_suite = results.generate_test_suite(\"My first test suite\")\n", + "test_suite.run()" + ] + }, + { + "cell_type": "markdown", + "id": "8a3ad35fd5e99884", + "metadata": { + "collapsed": false + }, + "source": [ + "## Customize your suite by loading objects from the Giskard catalog\n", + "\n", + "The Giskard open source catalog will enable to load:\n", + "\n", + "* Tests such as metamorphic, performance, prediction & data drift, statistical tests, etc\n", + "* Slicing functions such as detectors of toxicity, hate, emotion, etc\n", + "* Transformation functions such as generators of typos, paraphrase, style tune, etc\n", + "\n", + "For demo purposes, we will load a simple unit test (test_f1) that checks if the test F1 score is above the given threshold. For more examples of tests and functions, refer to the Giskard catalog." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "85546fcaa8635478", + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "test_suite.add_test(testing.test_f1(model=wrapped_model, dataset=wrapped_data, threshold=0.7)).run()" + ] + }, + { + "cell_type": "markdown", + "id": "79d44e8a70afe38b", + "metadata": { + "collapsed": false + }, + "source": [ + "## Upload your suite to the Giskard server\n", + "\n", + "Upload your suite to the Giskard server to:\n", + "\n", + "* Compare models to decide which model to promote\n", + "* Debug your tests to diagnose the issues\n", + "* Create more domain-specific tests that are integrating business feedback\n", + "* Share your results" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2da07a815b6bcb09", + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# Uploading the test suite will automatically save the model, dataset, tests, slicing & transformation functions inside the Giskard UI server\n", + "# Create a Giskard client after having install the Giskard server (see documentation)\n", + "token = \"API_TOKEN\" # Find it in Settings in the Giskard server\n", + "\n", + "client = GiskardClient(\n", + " url=\"http://localhost:19000\", # URL of your Giskard instance\n", + " token=token\n", + ")\n", + "\n", + "my_project = client.create_project(\"my_project\", \"PROJECT_NAME\", \"DESCRIPTION\")\n", + "\n", + "# Upload to the current project ✉️\n", + "test_suite.upload(client, \"my_project\")" + ] + }, + { + "cell_type": "markdown", + "id": "72b0d07193f8ada", + "metadata": { + "collapsed": false + }, + "source": [ + "
\n", + "Connecting Google Colab with the Giskard server\n", + "\n", + "If you are using Google Colab and you want to install the Giskard server **locally**, you can run the Giskard server by executing this line in the terminal of your **local** machine (see the [documentation](https://docs.giskard.ai/en/latest/guides/installation_app/index.html)):\n", + "\n", + "> giskard server start\n", + "\n", + "Once the Giskard server is running, from the same terminal on your **local** machine, you can run:\n", + "\n", + "> giskard server expose --ngrok-token \\\n", + "\n", + "Read the flowing [instructions](https://docs.giskard.ai/en/latest/cli/ngrok/index.html) in order to get the `ngrok_API_token`. This will provide you with the code snippets that you can copy and paste into your Colab notebook to establish a connection with your locally installed Giskard server\n", + "
" ] - }, - "metadata": {}, - "output_type": "display_data" } - ], - "source": [ - "display(results)" - ] - }, - { - "cell_type": "markdown", - "id": "fbc07e90f1d830b9", - "metadata": { - "collapsed": false - }, - "source": [ - "## Generate a test suite from the Scan\n", - "The objects produced by the scan can be used as fixtures to generate a test suite that integrate domain-specific issues. To create custom tests, refer to the Test your ML Model page." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "e740cee558970a9c", - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "test_suite = results.generate_test_suite(\"My first test suite\")\n", - "test_suite.run()" - ] - }, - { - "cell_type": "markdown", - "id": "8a3ad35fd5e99884", - "metadata": { - "collapsed": false - }, - "source": [ - "## Customize your suite by loading objects from the Giskard catalog\n", - "\n", - "The Giskard open source catalog will enable to load:\n", - "\n", - "* Tests such as metamorphic, performance, prediction & data drift, statistical tests, etc\n", - "* Slicing functions such as detectors of toxicity, hate, emotion, etc\n", - "* Transformation functions such as generators of typos, paraphrase, style tune, etc\n", - "\n", - "For demo purposes, we will load a simple unit test (test_f1) that checks if the test F1 score is above the given threshold. For more examples of tests and functions, refer to the Giskard catalog." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "85546fcaa8635478", - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "test_suite.add_test(testing.test_f1(model=wrapped_model, dataset=wrapped_data, threshold=0.7)).run()" - ] - }, - { - "cell_type": "markdown", - "id": "79d44e8a70afe38b", - "metadata": { - "collapsed": false - }, - "source": [ - "## Upload your suite to the Giskard server\n", - "\n", - "Upload your suite to the Giskard server to:\n", - "\n", - "* Compare models to decide which model to promote\n", - "* Debug your tests to diagnose the issues\n", - "* Create more domain-specific tests that are integrating business feedback\n", - "* Share your results" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "2da07a815b6bcb09", - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "# Uploading the test suite will automatically save the model, dataset, tests, slicing & transformation functions inside the Giskard UI server\n", - "# Create a Giskard client after having install the Giskard server (see documentation)\n", - "token = \"API_TOKEN\" # Find it in Settings in the Giskard server\n", - "\n", - "client = GiskardClient(\n", - " url=\"http://localhost:19000\", # URL of your Giskard instance\n", - " token=token\n", - ")\n", - "\n", - "my_project = client.create_project(\"my_project\", \"PROJECT_NAME\", \"DESCRIPTION\")\n", - "\n", - "# Upload to the current project ✉️\n", - "test_suite.upload(client, \"my_project\")" - ] - }, - { - "cell_type": "markdown", - "id": "72b0d07193f8ada", - "metadata": { - "collapsed": false - }, - "source": [ - "
\n", - "Connecting Google Colab with the Giskard server\n", - "\n", - "If you are using Google Colab and you want to install the Giskard server **locally**, you can run the Giskard server by executing this line in the terminal of your **local** machine (see the [documentation](https://docs.giskard.ai/en/latest/guides/installation_app/index.html)):\n", - "\n", - "> giskard server start\n", - "\n", - "Once the Giskard server is running, from the same terminal on your **local** machine, you can run:\n", - "\n", - "> giskard server expose --token \\\n", - "\n", - "Read the flowing [instructions](https://docs.giskard.ai/en/latest/cli/ngrok/index.html) in order to get the `ngrok_API_token`. This will provide you with the code snippets that you can copy and paste into your Colab notebook to establish a connection with your locally installed Giskard server\n", - "
" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.7.12" + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.12" + }, + "papermill": { + "default_parameters": {}, + "duration": 1295.42731, + "end_time": "2022-09-14T15:45:13.308486", + "environment_variables": {}, + "exception": null, + "input_path": "__notebook__.ipynb", + "output_path": "__notebook__.ipynb", + "parameters": {}, + "start_time": "2022-09-14T15:23:37.881176", + "version": "2.3.4" + } }, - "papermill": { - "default_parameters": {}, - "duration": 1295.42731, - "end_time": "2022-09-14T15:45:13.308486", - "environment_variables": {}, - "exception": null, - "input_path": "__notebook__.ipynb", - "output_path": "__notebook__.ipynb", - "parameters": {}, - "start_time": "2022-09-14T15:23:37.881176", - "version": "2.3.4" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} + "nbformat": 4, + "nbformat_minor": 5 +} \ No newline at end of file diff --git a/python-client/docs/reference/notebooks/movie_review_sentiment_classification_pytorch_sklearn.ipynb b/python-client/docs/reference/notebooks/movie_review_sentiment_classification_pytorch_sklearn.ipynb index fb71c7da67..d544ec53d5 100644 --- a/python-client/docs/reference/notebooks/movie_review_sentiment_classification_pytorch_sklearn.ipynb +++ b/python-client/docs/reference/notebooks/movie_review_sentiment_classification_pytorch_sklearn.ipynb @@ -1,2404 +1,2404 @@ { - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Movie Review Sentiment Classification with DISTILL-BERT [sklearn + torch preprocessing]\n", - "* Binary sentiment classification of movies' reviews. \n", - "* Reference notebook: \n", - "* Dataset: \n", - "\n", - "By running this notebook, you’ll create a whole test suite in a few lines of code. The model used here is a Logistic Regression, which uses Distill-BERT text embeddings to predict, whether movie review has positive or negative sentiment. Feel free to use your own model (tabular, text, or LLM).\n", - "\n", - "You’ll learn how to:\n", - "\n", - "* Detect vulnerabilities by scanning the model\n", - "* Generate a test suite with domain-specific tests\n", - "* Customize your test suite by loading a test from the Giskard catalog\n", - "* Upload your model to the Giskard server to:\n", - " * Compare models to decide which one to promote\n", - " * Debug your tests to diagnose issues\n", - " * Share your results and collect business feedback from your team" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Import libraries" - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "metadata": { - "ExecuteTime": { - "end_time": "2023-08-28T10:04:42.166870Z", - "start_time": "2023-08-28T10:04:41.978117Z" - } - }, - "outputs": [], - "source": [ - "import os\n", - "from pathlib import Path\n", - "from urllib.request import urlretrieve\n", - "\n", - "import torch\n", - "import numpy as np\n", - "import pandas as pd\n", - "import transformers as ppb\n", - "from sklearn.metrics import accuracy_score\n", - "from sklearn.linear_model import LogisticRegression\n", - "from sklearn.model_selection import train_test_split\n", - "\n", - "from giskard import Model, Dataset, scan, testing\n", - "from giskard.client.giskard_client import GiskardClient" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "collapsed": false - }, - "source": [ - "## Define constants" - ] - }, - { - "cell_type": "code", - "execution_count": 17, - "metadata": { - "ExecuteTime": { - "end_time": "2023-08-28T10:04:42.231117Z", - "start_time": "2023-08-28T10:04:41.992509Z" + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Movie Review Sentiment Classification with DISTILL-BERT [sklearn + torch preprocessing]\n", + "* Binary sentiment classification of movies' reviews. \n", + "* Reference notebook: \n", + "* Dataset: \n", + "\n", + "By running this notebook, you’ll create a whole test suite in a few lines of code. The model used here is a Logistic Regression, which uses Distill-BERT text embeddings to predict, whether movie review has positive or negative sentiment. Feel free to use your own model (tabular, text, or LLM).\n", + "\n", + "You’ll learn how to:\n", + "\n", + "* Detect vulnerabilities by scanning the model\n", + "* Generate a test suite with domain-specific tests\n", + "* Customize your test suite by loading a test from the Giskard catalog\n", + "* Upload your model to the Giskard server to:\n", + " * Compare models to decide which one to promote\n", + " * Debug your tests to diagnose issues\n", + " * Share your results and collect business feedback from your team" + ] }, - "collapsed": false - }, - "outputs": [], - "source": [ - "# Constants.\n", - "TARGET_COLUMN = \"label\"\n", - "TEXT_COLUMN = \"text\"\n", - "\n", - "PRETRAINED_WEIGHTS_NAME = \"distilbert-base-uncased\"\n", - "\n", - "RANDOM_STATE = 0\n", - "\n", - "# Paths.\n", - "DATA_URL = \"ftp://sys.giskard.ai/pub/unit_test_resources/movie_review_sentiment_classification_dataset/train.jsonl\"\n", - "DATA_PATH = Path.home() / \".giskard\" / \"movie_review_sentiment_classification_dataset\" / \"train.jsonl\"" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "collapsed": false - }, - "source": [ - "## Dataset preparation" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "collapsed": false - }, - "source": [ - "### Load data" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "def fetch_from_ftp(url: str, file: Path) -> None:\n", - " if not file.parent.exists():\n", - " file.parent.mkdir(parents=True, exist_ok=True)\n", - "\n", - " if not file.exists():\n", - " print(f\"Downloading data from {url}\")\n", - " urlretrieve(url, file)\n", - "\n", - " print(f\"Data was loaded!\")\n", - " \n", - "\n", - "def load_data(**kwargs) -> pd.DataFrame:\n", - " \"\"\"Load data.\"\"\"\n", - " fetch_from_ftp(DATA_URL, DATA_PATH)\n", - " \n", - " df = pd.read_json(DATA_PATH, lines=True, **kwargs)\n", - " df = df.drop(columns=\"label_text\")\n", - " \n", - " return df\n", - "\n", - "\n", - "reviews_df = load_data(nrows=2000)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "collapsed": false - }, - "source": [ - "### Train-Test split" - ] - }, - { - "cell_type": "code", - "execution_count": 19, - "metadata": { - "ExecuteTime": { - "end_time": "2023-08-28T10:04:42.295253Z", - "start_time": "2023-08-28T10:04:42.025756Z" + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Import libraries" + ] }, - "collapsed": false - }, - "outputs": [], - "source": [ - "train_df, test_df = train_test_split(reviews_df, random_state=RANDOM_STATE)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "collapsed": false - }, - "source": [ - "### Wrap dataset with Giskard" - ] - }, - { - "cell_type": "code", - "execution_count": 20, - "metadata": { - "ExecuteTime": { - "end_time": "2023-08-28T10:04:42.301825Z", - "start_time": "2023-08-28T10:04:42.047126Z" + { + "cell_type": "code", + "execution_count": 16, + "metadata": { + "ExecuteTime": { + "end_time": "2023-08-28T10:04:42.166870Z", + "start_time": "2023-08-28T10:04:41.978117Z" + } + }, + "outputs": [], + "source": [ + "import os\n", + "from pathlib import Path\n", + "from urllib.request import urlretrieve\n", + "\n", + "import torch\n", + "import numpy as np\n", + "import pandas as pd\n", + "import transformers as ppb\n", + "from sklearn.metrics import accuracy_score\n", + "from sklearn.linear_model import LogisticRegression\n", + "from sklearn.model_selection import train_test_split\n", + "\n", + "from giskard import Model, Dataset, scan, testing\n", + "from giskard.client.giskard_client import GiskardClient" + ] }, - "collapsed": false - }, - "outputs": [], - "source": [ - "wrapped_data = Dataset(\n", - " df=test_df, # A pandas.DataFrame that contains the raw data (before all the pre-processing steps) and the actual ground truth variable (target).\n", - " target=TARGET_COLUMN, # Ground truth variable.\n", - " name=\"Movie reviews dataset\" # Optional.\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "collapsed": false - }, - "source": [ - "## Model training" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "collapsed": false - }, - "source": [ - "### Define preprocessing steps" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "embedder = ppb.DistilBertModel.from_pretrained(PRETRAINED_WEIGHTS_NAME)\n", - "tokenizer = ppb.DistilBertTokenizer.from_pretrained(PRETRAINED_WEIGHTS_NAME)\n", - "\n", - "\n", - "def get_max_sequence_length(corpus: pd.Series) -> int:\n", - " \"\"\"Define a length of the longest tokenized document.\"\"\"\n", - " max_length = max(len(tokenizer.encode(document, add_special_tokens=True)) for document in corpus)\n", - " return max_length\n", - "\n", - "\n", - "max_sequence_length = get_max_sequence_length(reviews_df[TEXT_COLUMN])\n", - "\n", - "\n", - "def tokenize_documents(corpus: pd.Series) -> torch.Tensor:\n", - " \"\"\"Tokenization step.\"\"\"\n", - " tokens_matrix = corpus.apply(lambda document: tokenizer.encode(document, add_special_tokens=True)).values\n", - " tokens_matrix = torch.tensor([tokens_row + [0] * (max_sequence_length - len(tokens_row)) for tokens_row in tokens_matrix])\n", - " return tokens_matrix\n", - "\n", - "\n", - "def get_documents_embeddings(tokens_matrix: torch.Tensor) -> np.ndarray:\n", - " \"\"\"Calculate sentence embeddings using distill-BERT model.\"\"\"\n", - " attention_mask = torch.where(tokens_matrix != 0, 1, 0)\n", - " \n", - " embedder.eval()\n", - " with torch.no_grad():\n", - " tokens_representations = embedder(tokens_matrix, attention_mask=attention_mask)\n", - "\n", - " # Take just 'cls token' embeddings, which represent whole sentence embedding.\n", - " documents_embeddings = tokens_representations[0][:, 0, :].numpy()\n", - " return documents_embeddings\n", - "\n", - "\n", - "def preprocess_text(df: pd.DataFrame) -> np.ndarray:\n", - " \"\"\"Preprocessing function to be also used in 'giskard.Model'.\"\"\"\n", - " return get_documents_embeddings(tokenize_documents(df[TEXT_COLUMN]))\n", - "\n", - "\n", - "X_train, Y_train = preprocess_text(train_df), train_df.label\n", - "X_test, Y_test = preprocess_text(test_df), test_df.label" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Build estimator" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "classifier = LogisticRegression()\n", - "classifier.fit(X_train, Y_train)\n", - "\n", - "# Validate model.\n", - "train_score = classifier.score(X_train, Y_train)\n", - "print(f\"Train accuracy: {train_score: .2f}\")\n", - "\n", - "test_score = classifier.score(X_test, Y_test)\n", - "print(f\"Test accuracy: {test_score: .2f}\")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "collapsed": false - }, - "source": [ - "### Wrap model with Giskard" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "def prediction_function(df: pd.DataFrame) -> np.ndarray:\n", - " x = preprocess_text(df)\n", - " return classifier.predict_proba(x)\n", - "\n", - "\n", - "wrapped_model = Model(\n", - " model=prediction_function, # A prediction function that encapsulates all the data pre-processing steps and that could be executed with the dataset used by the scan.\n", - " model_type=\"classification\", # Either regression, classification or text_generation.\n", - " name=\"Movie reviews sentiment classifier\", # Optional.\n", - " classification_labels=classifier.classes_.tolist(), # Their order MUST be identical to the prediction_function's output order.\n", - " feature_names=[TEXT_COLUMN], # Default: all columns of your dataset.\n", - " # classification_threshold=0.5 # Default: 0.5.\n", - ")\n", - "\n", - "Y_test_pred_wrapped = wrapped_model.predict(wrapped_data).prediction\n", - "wrapped_test_score = accuracy_score(Y_test, Y_test_pred_wrapped)\n", - "print(f\"Wrapped test accuracy: {wrapped_test_score: .2f}\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "results = scan(wrapped_model, wrapped_data)" - ] - }, - { - "cell_type": "code", - "execution_count": 25, - "metadata": { - "ExecuteTime": { - "end_time": "2023-08-28T10:09:16.517583Z", - "start_time": "2023-08-28T10:09:16.313968Z" + { + "cell_type": "markdown", + "metadata": { + "collapsed": false + }, + "source": [ + "## Define constants" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": { + "ExecuteTime": { + "end_time": "2023-08-28T10:04:42.231117Z", + "start_time": "2023-08-28T10:04:41.992509Z" + }, + "collapsed": false + }, + "outputs": [], + "source": [ + "# Constants.\n", + "TARGET_COLUMN = \"label\"\n", + "TEXT_COLUMN = \"text\"\n", + "\n", + "PRETRAINED_WEIGHTS_NAME = \"distilbert-base-uncased\"\n", + "\n", + "RANDOM_STATE = 0\n", + "\n", + "# Paths.\n", + "DATA_URL = \"ftp://sys.giskard.ai/pub/unit_test_resources/movie_review_sentiment_classification_dataset/train.jsonl\"\n", + "DATA_PATH = Path.home() / \".giskard\" / \"movie_review_sentiment_classification_dataset\" / \"train.jsonl\"" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "collapsed": false + }, + "source": [ + "## Dataset preparation" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "collapsed": false + }, + "source": [ + "### Load data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "def fetch_from_ftp(url: str, file: Path) -> None:\n", + " if not file.parent.exists():\n", + " file.parent.mkdir(parents=True, exist_ok=True)\n", + "\n", + " if not file.exists():\n", + " print(f\"Downloading data from {url}\")\n", + " urlretrieve(url, file)\n", + "\n", + " print(f\"Data was loaded!\")\n", + " \n", + "\n", + "def load_data(**kwargs) -> pd.DataFrame:\n", + " \"\"\"Load data.\"\"\"\n", + " fetch_from_ftp(DATA_URL, DATA_PATH)\n", + " \n", + " df = pd.read_json(DATA_PATH, lines=True, **kwargs)\n", + " df = df.drop(columns=\"label_text\")\n", + " \n", + " return df\n", + "\n", + "\n", + "reviews_df = load_data(nrows=2000)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "collapsed": false + }, + "source": [ + "### Train-Test split" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": { + "ExecuteTime": { + "end_time": "2023-08-28T10:04:42.295253Z", + "start_time": "2023-08-28T10:04:42.025756Z" + }, + "collapsed": false + }, + "outputs": [], + "source": [ + "train_df, test_df = train_test_split(reviews_df, random_state=RANDOM_STATE)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "collapsed": false + }, + "source": [ + "### Wrap dataset with Giskard" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": { + "ExecuteTime": { + "end_time": "2023-08-28T10:04:42.301825Z", + "start_time": "2023-08-28T10:04:42.047126Z" + }, + "collapsed": false + }, + "outputs": [], + "source": [ + "wrapped_data = Dataset(\n", + " df=test_df, # A pandas.DataFrame that contains the raw data (before all the pre-processing steps) and the actual ground truth variable (target).\n", + " target=TARGET_COLUMN, # Ground truth variable.\n", + " name=\"Movie reviews dataset\" # Optional.\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "collapsed": false + }, + "source": [ + "## Model training" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "collapsed": false + }, + "source": [ + "### Define preprocessing steps" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "embedder = ppb.DistilBertModel.from_pretrained(PRETRAINED_WEIGHTS_NAME)\n", + "tokenizer = ppb.DistilBertTokenizer.from_pretrained(PRETRAINED_WEIGHTS_NAME)\n", + "\n", + "\n", + "def get_max_sequence_length(corpus: pd.Series) -> int:\n", + " \"\"\"Define a length of the longest tokenized document.\"\"\"\n", + " max_length = max(len(tokenizer.encode(document, add_special_tokens=True)) for document in corpus)\n", + " return max_length\n", + "\n", + "\n", + "max_sequence_length = get_max_sequence_length(reviews_df[TEXT_COLUMN])\n", + "\n", + "\n", + "def tokenize_documents(corpus: pd.Series) -> torch.Tensor:\n", + " \"\"\"Tokenization step.\"\"\"\n", + " tokens_matrix = corpus.apply(lambda document: tokenizer.encode(document, add_special_tokens=True)).values\n", + " tokens_matrix = torch.tensor([tokens_row + [0] * (max_sequence_length - len(tokens_row)) for tokens_row in tokens_matrix])\n", + " return tokens_matrix\n", + "\n", + "\n", + "def get_documents_embeddings(tokens_matrix: torch.Tensor) -> np.ndarray:\n", + " \"\"\"Calculate sentence embeddings using distill-BERT model.\"\"\"\n", + " attention_mask = torch.where(tokens_matrix != 0, 1, 0)\n", + " \n", + " embedder.eval()\n", + " with torch.no_grad():\n", + " tokens_representations = embedder(tokens_matrix, attention_mask=attention_mask)\n", + "\n", + " # Take just 'cls token' embeddings, which represent whole sentence embedding.\n", + " documents_embeddings = tokens_representations[0][:, 0, :].numpy()\n", + " return documents_embeddings\n", + "\n", + "\n", + "def preprocess_text(df: pd.DataFrame) -> np.ndarray:\n", + " \"\"\"Preprocessing function to be also used in 'giskard.Model'.\"\"\"\n", + " return get_documents_embeddings(tokenize_documents(df[TEXT_COLUMN]))\n", + "\n", + "\n", + "X_train, Y_train = preprocess_text(train_df), train_df.label\n", + "X_test, Y_test = preprocess_text(test_df), test_df.label" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Build estimator" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "classifier = LogisticRegression()\n", + "classifier.fit(X_train, Y_train)\n", + "\n", + "# Validate model.\n", + "train_score = classifier.score(X_train, Y_train)\n", + "print(f\"Train accuracy: {train_score: .2f}\")\n", + "\n", + "test_score = classifier.score(X_test, Y_test)\n", + "print(f\"Test accuracy: {test_score: .2f}\")" + ] }, - "collapsed": false - }, - "outputs": [ { - "data": { - "text/html": [ - "\n", - "" + "cell_type": "markdown", + "metadata": { + "collapsed": false + }, + "source": [ + "### Wrap model with Giskard" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "def prediction_function(df: pd.DataFrame) -> np.ndarray:\n", + " x = preprocess_text(df)\n", + " return classifier.predict_proba(x)\n", + "\n", + "\n", + "wrapped_model = Model(\n", + " model=prediction_function, # A prediction function that encapsulates all the data pre-processing steps and that could be executed with the dataset used by the scan.\n", + " model_type=\"classification\", # Either regression, classification or text_generation.\n", + " name=\"Movie reviews sentiment classifier\", # Optional.\n", + " classification_labels=classifier.classes_.tolist(), # Their order MUST be identical to the prediction_function's output order.\n", + " feature_names=[TEXT_COLUMN], # Default: all columns of your dataset.\n", + " # classification_threshold=0.5 # Default: 0.5.\n", + ")\n", + "\n", + "Y_test_pred_wrapped = wrapped_model.predict(wrapped_data).prediction\n", + "wrapped_test_score = accuracy_score(Y_test, Y_test_pred_wrapped)\n", + "print(f\"Wrapped test accuracy: {wrapped_test_score: .2f}\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "results = scan(wrapped_model, wrapped_data)" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": { + "ExecuteTime": { + "end_time": "2023-08-28T10:09:16.517583Z", + "start_time": "2023-08-28T10:09:16.313968Z" + }, + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "display(results)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "collapsed": false + }, + "source": [ + "## Generate a test suite from the Scan\n", + "The objects produced by the scan can be used as fixtures to generate a test suite that integrate domain-specific issues. To create custom tests, refer to the Test your ML Model page." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "test_suite = results.generate_test_suite(\"My first test suite\")\n", + "test_suite.run()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "collapsed": false + }, + "source": [ + "## Customize your suite by loading objects from the Giskard catalog\n", + "\n", + "The Giskard open source catalog will enable to load:\n", + "\n", + "* Tests such as metamorphic, performance, prediction & data drift, statistical tests, etc\n", + "* Slicing functions such as detectors of toxicity, hate, emotion, etc\n", + "* Transformation functions such as generators of typos, paraphrase, style tune, etc\n", + "\n", + "For demo purposes, we will load a simple unit test (test_f1) that checks if the test F1 score is above the given threshold. For more examples of tests and functions, refer to the Giskard catalog." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "test_suite.add_test(testing.test_f1(model=wrapped_model, dataset=wrapped_data, threshold=0.7)).run()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "collapsed": false + }, + "source": [ + "## Upload your suite to the Giskard server\n", + "\n", + "Upload your suite to the Giskard server to:\n", + "\n", + "* Compare models to decide which model to promote\n", + "* Debug your tests to diagnose the issues\n", + "* Create more domain-specific tests that are integrating business feedback\n", + "* Share your results" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# Uploading the test suite will automatically save the model, dataset, tests, slicing & transformation functions inside the Giskard UI server\n", + "# Create a Giskard client after having install the Giskard server (see documentation)\n", + "token = \"API_TOKEN\" # Find it in Settings in the Giskard server\n", + "\n", + "client = GiskardClient(\n", + " url=\"http://localhost:19000\", # URL of your Giskard instance\n", + " token=token\n", + ")\n", + "\n", + "my_project = client.create_project(\"my_project\", \"PROJECT_NAME\", \"DESCRIPTION\")\n", + "\n", + "# Upload to the current project ✉️\n", + "test_suite.upload(client, \"my_project\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "collapsed": false + }, + "source": [ + "
\n", + "Connecting Google Colab with the Giskard server\n", + "\n", + "If you are using Google Colab and you want to install the Giskard server **locally**, you can run the Giskard server by executing this line in the terminal of your **local** machine (see the [documentation](https://docs.giskard.ai/en/latest/guides/installation_app/index.html)):\n", + "\n", + "> giskard server start\n", + "\n", + "Once the Giskard server is running, from the same terminal on your **local** machine, you can run:\n", + "\n", + "> giskard server expose --ngrok-token \\\n", + "\n", + "Read the flowing [instructions](https://docs.giskard.ai/en/latest/cli/ngrok/index.html) in order to get the `ngrok_API_token`. This will provide you with the code snippets that you can copy and paste into your Colab notebook to establish a connection with your locally installed Giskard server\n", + "
" ] - }, - "metadata": {}, - "output_type": "display_data" } - ], - "source": [ - "display(results)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "collapsed": false - }, - "source": [ - "## Generate a test suite from the Scan\n", - "The objects produced by the scan can be used as fixtures to generate a test suite that integrate domain-specific issues. To create custom tests, refer to the Test your ML Model page." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "test_suite = results.generate_test_suite(\"My first test suite\")\n", - "test_suite.run()" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "collapsed": false - }, - "source": [ - "## Customize your suite by loading objects from the Giskard catalog\n", - "\n", - "The Giskard open source catalog will enable to load:\n", - "\n", - "* Tests such as metamorphic, performance, prediction & data drift, statistical tests, etc\n", - "* Slicing functions such as detectors of toxicity, hate, emotion, etc\n", - "* Transformation functions such as generators of typos, paraphrase, style tune, etc\n", - "\n", - "For demo purposes, we will load a simple unit test (test_f1) that checks if the test F1 score is above the given threshold. For more examples of tests and functions, refer to the Giskard catalog." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "test_suite.add_test(testing.test_f1(model=wrapped_model, dataset=wrapped_data, threshold=0.7)).run()" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "collapsed": false - }, - "source": [ - "## Upload your suite to the Giskard server\n", - "\n", - "Upload your suite to the Giskard server to:\n", - "\n", - "* Compare models to decide which model to promote\n", - "* Debug your tests to diagnose the issues\n", - "* Create more domain-specific tests that are integrating business feedback\n", - "* Share your results" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "# Uploading the test suite will automatically save the model, dataset, tests, slicing & transformation functions inside the Giskard UI server\n", - "# Create a Giskard client after having install the Giskard server (see documentation)\n", - "token = \"API_TOKEN\" # Find it in Settings in the Giskard server\n", - "\n", - "client = GiskardClient(\n", - " url=\"http://localhost:19000\", # URL of your Giskard instance\n", - " token=token\n", - ")\n", - "\n", - "my_project = client.create_project(\"my_project\", \"PROJECT_NAME\", \"DESCRIPTION\")\n", - "\n", - "# Upload to the current project ✉️\n", - "test_suite.upload(client, \"my_project\")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "collapsed": false - }, - "source": [ - "
\n", - "Connecting Google Colab with the Giskard server\n", - "\n", - "If you are using Google Colab and you want to install the Giskard server **locally**, you can run the Giskard server by executing this line in the terminal of your **local** machine (see the [documentation](https://docs.giskard.ai/en/latest/guides/installation_app/index.html)):\n", - "\n", - "> giskard server start\n", - "\n", - "Once the Giskard server is running, from the same terminal on your **local** machine, you can run:\n", - "\n", - "> giskard server expose --token \\\n", - "\n", - "Read the flowing [instructions](https://docs.giskard.ai/en/latest/cli/ngrok/index.html) in order to get the `ngrok_API_token`. This will provide you with the code snippets that you can copy and paste into your Colab notebook to establish a connection with your locally installed Giskard server\n", - "
" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.6.6" - }, - "widgets": { - "application/vnd.jupyter.widget-state+json": { - "state": { - "015617b377f24bfb947341ef6021d751": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_b65b15ed6193433d93b240937b72c9cb", - "placeholder": "​", - "style": "IPY_MODEL_40e15ff7d84f4000b33060dc8f2fda2c", - "value": " 268M/268M [00:09<00:00, 29.6MB/s]" - } - }, - "26a0a540385d4f64b016994b200bde56": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "3622111d5ab944fc8faf64a96da4504a": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "40022bfab7ab4eb08bceb140bac98d4d": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "ProgressStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "initial" - } - }, - "40e15ff7d84f4000b33060dc8f2fda2c": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "57398ca7fa59487d81d65f98461949fb": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "ProgressStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "initial" - } - }, - "57f7eea4b37e477b9d187306c7bd45d5": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "IntProgressModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "IntProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "Downloading: 100%", - "description_tooltip": null, - "layout": "IPY_MODEL_3622111d5ab944fc8faf64a96da4504a", - "max": 546, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_b0a475e2fa3d454c89a32111eed0236b", - "value": 546 - } - }, - "58bdc5a295ea4bb1a84ddc23513bc249": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_f19291fa9e3b4fbcbb7c4c2b5b48ee27", - "placeholder": "​", - "style": "IPY_MODEL_dda0eb368a604f81921afb1f31a92211", - "value": " 232k/232k [00:00<00:00, 676kB/s]" - } - }, - "59a016b7e79d4409929f374bee4495dd": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "5de320d9a6884f23b9a6ee511100803d": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "7f73305ddfaa4965888d7ac9769278b5": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HBoxModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_57f7eea4b37e477b9d187306c7bd45d5", - "IPY_MODEL_b54c4094d5034b5d9bd42cb777a82004" - ], - "layout": "IPY_MODEL_928b17f3e9c9495fa0791a4b2ca887f2" - } - }, - "8f4f7ef76f1b40c5b517c6b245ff2c55": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "928b17f3e9c9495fa0791a4b2ca887f2": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "953ec34fae19457893fe5c11ff97ee58": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "b0a475e2fa3d454c89a32111eed0236b": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "ProgressStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "initial" - } - }, - "b54c4094d5034b5d9bd42cb777a82004": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_59a016b7e79d4409929f374bee4495dd", - "placeholder": "​", - "style": "IPY_MODEL_f0e9871cead147a98b31e074dc9827d3", - "value": " 546/546 [00:00<00:00, 662B/s]" - } - }, - "b65b15ed6193433d93b240937b72c9cb": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "b85f6f130c4a4cae8db4ce9239e19a14": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HBoxModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_c8d6e3ba1b2b4bb9b5e329cec6e7e4c5", - "IPY_MODEL_015617b377f24bfb947341ef6021d751" - ], - "layout": "IPY_MODEL_5de320d9a6884f23b9a6ee511100803d" - } - }, - "c8d6e3ba1b2b4bb9b5e329cec6e7e4c5": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "IntProgressModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "IntProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "Downloading: 100%", - "description_tooltip": null, - "layout": "IPY_MODEL_8f4f7ef76f1b40c5b517c6b245ff2c55", - "max": 267967963, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_57398ca7fa59487d81d65f98461949fb", - "value": 267967963 - } - }, - "dda0eb368a604f81921afb1f31a92211": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "eee8f26976914224a09b20361eb45b14": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "IntProgressModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "IntProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "Downloading: 100%", - "description_tooltip": null, - "layout": "IPY_MODEL_26a0a540385d4f64b016994b200bde56", - "max": 231508, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_40022bfab7ab4eb08bceb140bac98d4d", - "value": 231508 - } - }, - "f0e9871cead147a98b31e074dc9827d3": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "f19291fa9e3b4fbcbb7c4c2b5b48ee27": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "fa4579b2ada84c6985d088c6bca7f72f": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HBoxModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_eee8f26976914224a09b20361eb45b14", - "IPY_MODEL_58bdc5a295ea4bb1a84ddc23513bc249" - ], - "layout": "IPY_MODEL_953ec34fae19457893fe5c11ff97ee58" - } - } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" }, - "version_major": 2, - "version_minor": 0 - } - } - }, - "nbformat": 4, - "nbformat_minor": 1 -} + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.6" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": { + "015617b377f24bfb947341ef6021d751": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_b65b15ed6193433d93b240937b72c9cb", + "placeholder": "​", + "style": "IPY_MODEL_40e15ff7d84f4000b33060dc8f2fda2c", + "value": " 268M/268M [00:09<00:00, 29.6MB/s]" + } + }, + "26a0a540385d4f64b016994b200bde56": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "3622111d5ab944fc8faf64a96da4504a": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "40022bfab7ab4eb08bceb140bac98d4d": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "initial" + } + }, + "40e15ff7d84f4000b33060dc8f2fda2c": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "57398ca7fa59487d81d65f98461949fb": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "initial" + } + }, + "57f7eea4b37e477b9d187306c7bd45d5": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "IntProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "IntProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "Downloading: 100%", + "description_tooltip": null, + "layout": "IPY_MODEL_3622111d5ab944fc8faf64a96da4504a", + "max": 546, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_b0a475e2fa3d454c89a32111eed0236b", + "value": 546 + } + }, + "58bdc5a295ea4bb1a84ddc23513bc249": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_f19291fa9e3b4fbcbb7c4c2b5b48ee27", + "placeholder": "​", + "style": "IPY_MODEL_dda0eb368a604f81921afb1f31a92211", + "value": " 232k/232k [00:00<00:00, 676kB/s]" + } + }, + "59a016b7e79d4409929f374bee4495dd": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "5de320d9a6884f23b9a6ee511100803d": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "7f73305ddfaa4965888d7ac9769278b5": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_57f7eea4b37e477b9d187306c7bd45d5", + "IPY_MODEL_b54c4094d5034b5d9bd42cb777a82004" + ], + "layout": "IPY_MODEL_928b17f3e9c9495fa0791a4b2ca887f2" + } + }, + "8f4f7ef76f1b40c5b517c6b245ff2c55": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "928b17f3e9c9495fa0791a4b2ca887f2": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "953ec34fae19457893fe5c11ff97ee58": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "b0a475e2fa3d454c89a32111eed0236b": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "initial" + } + }, + "b54c4094d5034b5d9bd42cb777a82004": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_59a016b7e79d4409929f374bee4495dd", + "placeholder": "​", + "style": "IPY_MODEL_f0e9871cead147a98b31e074dc9827d3", + "value": " 546/546 [00:00<00:00, 662B/s]" + } + }, + "b65b15ed6193433d93b240937b72c9cb": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "b85f6f130c4a4cae8db4ce9239e19a14": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_c8d6e3ba1b2b4bb9b5e329cec6e7e4c5", + "IPY_MODEL_015617b377f24bfb947341ef6021d751" + ], + "layout": "IPY_MODEL_5de320d9a6884f23b9a6ee511100803d" + } + }, + "c8d6e3ba1b2b4bb9b5e329cec6e7e4c5": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "IntProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "IntProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "Downloading: 100%", + "description_tooltip": null, + "layout": "IPY_MODEL_8f4f7ef76f1b40c5b517c6b245ff2c55", + "max": 267967963, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_57398ca7fa59487d81d65f98461949fb", + "value": 267967963 + } + }, + "dda0eb368a604f81921afb1f31a92211": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "eee8f26976914224a09b20361eb45b14": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "IntProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "IntProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "Downloading: 100%", + "description_tooltip": null, + "layout": "IPY_MODEL_26a0a540385d4f64b016994b200bde56", + "max": 231508, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_40022bfab7ab4eb08bceb140bac98d4d", + "value": 231508 + } + }, + "f0e9871cead147a98b31e074dc9827d3": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "f19291fa9e3b4fbcbb7c4c2b5b48ee27": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "fa4579b2ada84c6985d088c6bca7f72f": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_eee8f26976914224a09b20361eb45b14", + "IPY_MODEL_58bdc5a295ea4bb1a84ddc23513bc249" + ], + "layout": "IPY_MODEL_953ec34fae19457893fe5c11ff97ee58" + } + } + }, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 1 +} \ No newline at end of file diff --git a/python-client/docs/reference/notebooks/newspaper_classification_pytorch.ipynb b/python-client/docs/reference/notebooks/newspaper_classification_pytorch.ipynb index 6bd7fafb24..e313d1f4d1 100644 --- a/python-client/docs/reference/notebooks/newspaper_classification_pytorch.ipynb +++ b/python-client/docs/reference/notebooks/newspaper_classification_pytorch.ipynb @@ -1,605 +1,605 @@ { - "cells": [ - { - "cell_type": "markdown", - "source": [ - "# Newspaper classification [PyTorch]\n", - "\n", - "By running this notebook, you’ll create a whole test suite in a few lines of code. The model used here is a custom Neural Network, which predicts newspaper category (sports, news, business or sci-tech), based on its contents. Feel free to use your own model (tabular, text, or LLM).\n", - "\n", - "You’ll learn how to:\n", - "\n", - "* Detect vulnerabilities by scanning the model\n", - "* Generate a test suite with domain-specific tests\n", - "* Customize your test suite by loading a test from the Giskard catalog\n", - "* Upload your model to the Giskard server to:\n", - " * Compare models to decide which one to promote\n", - " * Debug your tests to diagnose issues\n", - " * Share your results and collect business feedback from your team" - ], - "metadata": { - "collapsed": false - } - }, - { - "cell_type": "markdown", - "source": [ - "## Install Giskard" - ], - "metadata": { - "collapsed": false - } - }, - { - "cell_type": "code", - "execution_count": 1, - "outputs": [], - "source": [ - "!pip install \"giskard>=2.0.0b\" -U" - ], - "metadata": { - "collapsed": false, - "ExecuteTime": { - "end_time": "2023-08-22T12:41:11.380265Z", - "start_time": "2023-08-22T12:41:11.302704Z" - } - } - }, - { - "cell_type": "markdown", - "source": [ - "## Import libraries" - ], - "metadata": { - "collapsed": false - } - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": { - "id": "eup4gpgVoA10", - "ExecuteTime": { - "end_time": "2023-08-22T12:41:36.268665Z", - "start_time": "2023-08-22T12:41:13.622046Z" - } - }, - "outputs": [], - "source": [ - "import time\n", - "\n", - "import torch\n", - "import numpy as np\n", - "import pandas as pd\n", - "from torch import nn\n", - "from torchtext.datasets import AG_NEWS\n", - "from torch.utils.data import DataLoader\n", - "from sklearn.metrics import accuracy_score\n", - "from torchtext.data.utils import get_tokenizer\n", - "from torch.utils.data.dataset import random_split\n", - "from torchtext.vocab import build_vocab_from_iterator\n", - "from torchtext.data.functional import to_map_style_dataset\n", - "\n", - "from giskard import Model, Dataset, GiskardClient, scan, testing" - ] - }, - { - "cell_type": "markdown", - "source": [ - "## Define constants" - ], - "metadata": { - "collapsed": false - } - }, - { - "cell_type": "code", - "execution_count": 3, - "outputs": [], - "source": [ - "DEVICE = torch.device(\"cpu\")\n", - "\n", - "TARGET_MAP = {0: \"World\", 1: \"Sports\", 2: \"Business\", 3: \"Sci/Tech\"}\n", - "TARGET_COLUMN_NAME = \"label\"\n", - "FEATURE_COLUMN_NAME = \"text\"\n", - "\n", - "LOADERS_BATCH_SIZE = 64" - ], - "metadata": { - "collapsed": false, - "ExecuteTime": { - "end_time": "2023-08-22T12:41:36.321238Z", - "start_time": "2023-08-22T12:41:36.276379Z" - } - } - }, - { - "cell_type": "markdown", - "source": [ - "## Dataset preparation" - ], - "metadata": { - "collapsed": false - } - }, - { - "cell_type": "markdown", - "source": [ - "### Load data" - ], - "metadata": { - "collapsed": false - } - }, - { - "cell_type": "code", - "execution_count": 4, - "outputs": [], - "source": [ - "train_data, test_data = AG_NEWS()" - ], - "metadata": { - "collapsed": false, - "ExecuteTime": { - "end_time": "2023-08-22T12:41:37.446562Z", - "start_time": "2023-08-22T12:41:36.300328Z" - } - } - }, - { - "cell_type": "markdown", - "source": [ - "### Wrap dataset with Giskard" - ], - "metadata": { - "collapsed": false - } - }, - { - "cell_type": "code", - "execution_count": 20, - "outputs": [], - "source": [ - "raw_data = pd.DataFrame({TARGET_COLUMN_NAME: TARGET_MAP[label_id - 1], FEATURE_COLUMN_NAME: text} \n", - " for label_id, text in test_data)\n", - "wrapped_data = Dataset(\n", - " df=raw_data, # A pandas.DataFrame that contains the raw data (before all the pre-processing steps) and the actual ground truth variable\n", - " name=\"Test Dataset\", # Ground truth variable\n", - " target=\"label\", # Optional\n", - ")" - ], - "metadata": { - "collapsed": false, - "ExecuteTime": { - "end_time": "2023-08-22T13:59:47.323179Z", - "start_time": "2023-08-22T13:59:45.958410Z" - } - } - }, - { - "cell_type": "markdown", - "source": [ - "### Prepare dataloaders for training and evaluation" - ], - "metadata": { - "collapsed": false - } - }, - { - "cell_type": "code", - "execution_count": 21, - "metadata": { - "ExecuteTime": { - "end_time": "2023-08-22T14:00:13.657270Z", - "start_time": "2023-08-22T13:59:59.584390Z" - } - }, - "outputs": [], - "source": [ - "# Simple English tokenizer provided by torchtext.\n", - "tokenizer = get_tokenizer(\"basic_english\")\n", - "\n", - "# Build a vocabulary from all the tokens we can find in the train data.\n", - "vocab = build_vocab_from_iterator((tokenizer(text) for _, text in train_data), specials=[\"\"])\n", - "vocab.set_default_index(vocab[\"\"])\n", - "\n", - "\n", - "def preprocess_text(raw_text):\n", - " return vocab(tokenizer(raw_text))\n", - "\n", - "\n", - "def preprocess_label(raw_label):\n", - " return int(raw_label) - 1\n", - "\n", - "\n", - "def collate_fn(batch):\n", - " label_list, text_list, offsets = [], [], [0]\n", - "\n", - " for _label, _text in batch:\n", - " label_list.append(preprocess_label(_label))\n", - " processed_text = torch.tensor(preprocess_text(_text), dtype=torch.int64)\n", - " text_list.append(processed_text)\n", - " offsets.append(processed_text.size(0))\n", - "\n", - " label_list = torch.tensor(label_list, dtype=torch.int64)\n", - " offsets = torch.tensor(offsets[:-1]).cumsum(dim=0)\n", - " text_list = torch.cat(text_list)\n", - "\n", - " return label_list.to(DEVICE), text_list.to(DEVICE), offsets.to(DEVICE)\n", - "\n", - "\n", - "# Create the datasets\n", - "train_dataset = to_map_style_dataset(train_data)\n", - "test_dataset = to_map_style_dataset(test_data)\n", - "\n", - "# We further divide the training data into a train and validation split.\n", - "train_split, valid_split = random_split(train_dataset, [0.95, 0.05])\n", - "\n", - "# Prepare the data loaders\n", - "train_dataloader = DataLoader(train_split, batch_size=LOADERS_BATCH_SIZE, shuffle=True, collate_fn=collate_fn)\n", - "valid_dataloader = DataLoader(valid_split, batch_size=LOADERS_BATCH_SIZE, shuffle=True, collate_fn=collate_fn)\n", - "test_dataloader = DataLoader(test_dataset, batch_size=LOADERS_BATCH_SIZE, shuffle=True, collate_fn=collate_fn)" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": { - "id": "zgbK39d6oA14" - }, - "source": [ - "## Model training" - ] - }, - { - "cell_type": "markdown", - "source": [ - "### Define model" - ], - "metadata": { - "collapsed": false - } - }, - { - "cell_type": "code", - "execution_count": 22, - "outputs": [], - "source": [ - "class TextClassificationModel(nn.Module):\n", - " def __init__(self, vocab_size, embed_dim, num_class):\n", - " super(TextClassificationModel, self).__init__()\n", - " self.embedding = nn.EmbeddingBag(vocab_size, embed_dim, sparse=False)\n", - " self.fc = nn.Linear(embed_dim, num_class)\n", - " self.init_weights()\n", - "\n", - " def init_weights(self):\n", - " init_range = 0.5\n", - " self.embedding.weight.data.uniform_(-init_range, init_range)\n", - " self.fc.weight.data.uniform_(-init_range, init_range)\n", - " self.fc.bias.data.zero_()\n", - "\n", - " def forward(self, text, offsets):\n", - " embedded = self.embedding(text, offsets)\n", - " return self.fc(embedded).softmax(axis=-1)\n", - "\n", - "\n", - "model = TextClassificationModel(vocab_size=len(vocab), embed_dim=64, num_class=4).to(DEVICE)" - ], - "metadata": { - "collapsed": false, - "ExecuteTime": { - "end_time": "2023-08-22T14:01:16.715230Z", - "start_time": "2023-08-22T14:01:16.579679Z" - } - } - }, - { - "cell_type": "markdown", - "source": [ - "### Train and evaluate model" - ], - "metadata": { - "collapsed": false - } - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "tnYAEF9OoA14" - }, - "outputs": [], - "source": [ - "criterion = torch.nn.CrossEntropyLoss()\n", - "optimizer = torch.optim.SGD(model.parameters(), lr=5)\n", - "scheduler = torch.optim.lr_scheduler.StepLR(optimizer, 1, gamma=0.1)\n", - "\n", - "\n", - "def train_epoch(dataloader):\n", - " model.train()\n", - "\n", - " train_accuracy = total_count = 0\n", - " for label, text, offset in dataloader:\n", - " optimizer.zero_grad()\n", - " predicted_label = model(text, offset)\n", - " loss = criterion(predicted_label, label)\n", - " loss.backward()\n", - " torch.nn.utils.clip_grad_norm_(model.parameters(), 0.1)\n", - " optimizer.step()\n", - " train_accuracy += (predicted_label.argmax(1) == label).sum().item()\n", - " total_count += label.size(0)\n", - "\n", - " return train_accuracy / total_count\n", - "\n", - "\n", - "def validation_epoch(dataloader):\n", - " model.eval()\n", - "\n", - " validation_accuracy = total_count = 0\n", - " with torch.no_grad():\n", - " for label, text, offsets in dataloader:\n", - " predicted_label = model(text, offsets)\n", - " validation_accuracy += (predicted_label.argmax(1) == label).sum().item()\n", - " total_count += label.size(0)\n", - "\n", - " return validation_accuracy / total_count\n", - "\n", - "\n", - "total_accuracy = None\n", - "for epoch in range(1, 3):\n", - " start_time = time.perf_counter()\n", - "\n", - " train_epoch(train_dataloader)\n", - " accu_val = validation_epoch(valid_dataloader)\n", - "\n", - " if total_accuracy is not None and total_accuracy > accu_val:\n", - " scheduler.step()\n", - " else:\n", - " total_accuracy = accu_val\n", - "\n", - " print(\"-\" * 65)\n", - " print(f\"| end of epoch {epoch: .3f} | time: {time.perf_counter() - start_time :5.2f}s | valid accuracy {accu_val:8.3f} \")\n", - " print(\"-\" * 65)\n", - "\n", - "\n", - "test_accuracy = validation_epoch(test_dataloader)\n", - "print('Test accuracy {:8.3f}'.format(test_accuracy))" - ] - }, - { - "cell_type": "markdown", - "source": [ - "### Wrap model with Giskard" - ], - "metadata": { - "collapsed": false - } - }, - { - "cell_type": "code", - "execution_count": null, - "outputs": [], - "source": [ - "def infer_predictions(_model: torch.nn.Module, _dataloader: DataLoader) -> np.ndarray:\n", - " _model.eval()\n", - " pred = list()\n", - "\n", - " for _, text, offsets in _dataloader:\n", - " with torch.no_grad():\n", - " probs = model(text, offsets).cpu().detach().numpy()\n", - "\n", - " pred.append(probs)\n", - "\n", - " pred = np.concatenate(pred, axis=0)\n", - " return pred\n", - "\n", - "\n", - "def prediction_function(df) -> np.ndarray:\n", - " # Placeholder for label.\n", - " if df.shape[1] == 1:\n", - " df.insert(0, TARGET_COLUMN_NAME, np.zeros(len(df)))\n", - "\n", - " data_iterator = df.itertuples(index=False)\n", - " dataloader = DataLoader(to_map_style_dataset(data_iterator), batch_size=LOADERS_BATCH_SIZE, collate_fn=collate_fn)\n", - " predictions = infer_predictions(model, dataloader)\n", - " predictions = predictions\n", - "\n", - " return predictions\n", - "\n", - "\n", - "wrapped_model = Model(\n", - " model=prediction_function, # A prediction function that encapsulates all the data pre-processing steps and that could be executed with the\n", - " model_type=\"classification\", # Either regression, classification or text_generation.\n", - " name=\"Simple News Classification Model\", # Optional.\n", - " classification_labels=list(TARGET_MAP.values()), # Their order MUST be identical to the prediction_function's output order.\n", - " feature_names=[\"text\"], # Default: all columns of your dataset.\n", - ")\n", - "\n", - "# Validate wrapped model.\n", - "wrapped_test_metric = accuracy_score(wrapped_data.df[TARGET_COLUMN_NAME], wrapped_model.predict(wrapped_data).prediction)\n", - "print(f\"Wrapped Test accuracy: {wrapped_test_metric:.3f}\")" - ], - "metadata": { - "collapsed": false - } - }, - { - "cell_type": "markdown", - "source": [ - "## Scan model with Giskard\n", - "With the Giskard scan feature, you can detect vulnerabilities in your model, including performance biases, unrobustness, data leakage, stochasticity, underconfidence, ethical issues, and more. For detailed information about the scan feature, please refer to our scan documentation." - ], - "metadata": { - "collapsed": false - } - }, - { - "cell_type": "code", - "execution_count": null, - "outputs": [], - "source": [ - "results = scan(wrapped_model, wrapped_data)" - ], - "metadata": { - "collapsed": false - } - }, - { - "cell_type": "code", - "execution_count": 12, - "outputs": [ + "cells": [ + { + "cell_type": "markdown", + "source": [ + "# Newspaper classification [PyTorch]\n", + "\n", + "By running this notebook, you’ll create a whole test suite in a few lines of code. The model used here is a custom Neural Network, which predicts newspaper category (sports, news, business or sci-tech), based on its contents. Feel free to use your own model (tabular, text, or LLM).\n", + "\n", + "You’ll learn how to:\n", + "\n", + "* Detect vulnerabilities by scanning the model\n", + "* Generate a test suite with domain-specific tests\n", + "* Customize your test suite by loading a test from the Giskard catalog\n", + "* Upload your model to the Giskard server to:\n", + " * Compare models to decide which one to promote\n", + " * Debug your tests to diagnose issues\n", + " * Share your results and collect business feedback from your team" + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "markdown", + "source": [ + "## Install Giskard" + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "code", + "execution_count": 1, + "outputs": [], + "source": [ + "!pip install \"giskard>=2.0.0b\" -U" + ], + "metadata": { + "collapsed": false, + "ExecuteTime": { + "end_time": "2023-08-22T12:41:11.380265Z", + "start_time": "2023-08-22T12:41:11.302704Z" + } + } + }, + { + "cell_type": "markdown", + "source": [ + "## Import libraries" + ], + "metadata": { + "collapsed": false + } + }, { - "data": { - "text/html": "\n" - }, - "metadata": {}, - "output_type": "display_data" + "cell_type": "code", + "execution_count": 2, + "metadata": { + "id": "eup4gpgVoA10", + "ExecuteTime": { + "end_time": "2023-08-22T12:41:36.268665Z", + "start_time": "2023-08-22T12:41:13.622046Z" + } + }, + "outputs": [], + "source": [ + "import time\n", + "\n", + "import torch\n", + "import numpy as np\n", + "import pandas as pd\n", + "from torch import nn\n", + "from torchtext.datasets import AG_NEWS\n", + "from torch.utils.data import DataLoader\n", + "from sklearn.metrics import accuracy_score\n", + "from torchtext.data.utils import get_tokenizer\n", + "from torch.utils.data.dataset import random_split\n", + "from torchtext.vocab import build_vocab_from_iterator\n", + "from torchtext.data.functional import to_map_style_dataset\n", + "\n", + "from giskard import Model, Dataset, GiskardClient, scan, testing" + ] + }, + { + "cell_type": "markdown", + "source": [ + "## Define constants" + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "code", + "execution_count": 3, + "outputs": [], + "source": [ + "DEVICE = torch.device(\"cpu\")\n", + "\n", + "TARGET_MAP = {0: \"World\", 1: \"Sports\", 2: \"Business\", 3: \"Sci/Tech\"}\n", + "TARGET_COLUMN_NAME = \"label\"\n", + "FEATURE_COLUMN_NAME = \"text\"\n", + "\n", + "LOADERS_BATCH_SIZE = 64" + ], + "metadata": { + "collapsed": false, + "ExecuteTime": { + "end_time": "2023-08-22T12:41:36.321238Z", + "start_time": "2023-08-22T12:41:36.276379Z" + } + } + }, + { + "cell_type": "markdown", + "source": [ + "## Dataset preparation" + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "markdown", + "source": [ + "### Load data" + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "code", + "execution_count": 4, + "outputs": [], + "source": [ + "train_data, test_data = AG_NEWS()" + ], + "metadata": { + "collapsed": false, + "ExecuteTime": { + "end_time": "2023-08-22T12:41:37.446562Z", + "start_time": "2023-08-22T12:41:36.300328Z" + } + } + }, + { + "cell_type": "markdown", + "source": [ + "### Wrap dataset with Giskard" + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "code", + "execution_count": 20, + "outputs": [], + "source": [ + "raw_data = pd.DataFrame({TARGET_COLUMN_NAME: TARGET_MAP[label_id - 1], FEATURE_COLUMN_NAME: text} \n", + " for label_id, text in test_data)\n", + "wrapped_data = Dataset(\n", + " df=raw_data, # A pandas.DataFrame that contains the raw data (before all the pre-processing steps) and the actual ground truth variable\n", + " name=\"Test Dataset\", # Ground truth variable\n", + " target=\"label\", # Optional\n", + ")" + ], + "metadata": { + "collapsed": false, + "ExecuteTime": { + "end_time": "2023-08-22T13:59:47.323179Z", + "start_time": "2023-08-22T13:59:45.958410Z" + } + } + }, + { + "cell_type": "markdown", + "source": [ + "### Prepare dataloaders for training and evaluation" + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": { + "ExecuteTime": { + "end_time": "2023-08-22T14:00:13.657270Z", + "start_time": "2023-08-22T13:59:59.584390Z" + } + }, + "outputs": [], + "source": [ + "# Simple English tokenizer provided by torchtext.\n", + "tokenizer = get_tokenizer(\"basic_english\")\n", + "\n", + "# Build a vocabulary from all the tokens we can find in the train data.\n", + "vocab = build_vocab_from_iterator((tokenizer(text) for _, text in train_data), specials=[\"\"])\n", + "vocab.set_default_index(vocab[\"\"])\n", + "\n", + "\n", + "def preprocess_text(raw_text):\n", + " return vocab(tokenizer(raw_text))\n", + "\n", + "\n", + "def preprocess_label(raw_label):\n", + " return int(raw_label) - 1\n", + "\n", + "\n", + "def collate_fn(batch):\n", + " label_list, text_list, offsets = [], [], [0]\n", + "\n", + " for _label, _text in batch:\n", + " label_list.append(preprocess_label(_label))\n", + " processed_text = torch.tensor(preprocess_text(_text), dtype=torch.int64)\n", + " text_list.append(processed_text)\n", + " offsets.append(processed_text.size(0))\n", + "\n", + " label_list = torch.tensor(label_list, dtype=torch.int64)\n", + " offsets = torch.tensor(offsets[:-1]).cumsum(dim=0)\n", + " text_list = torch.cat(text_list)\n", + "\n", + " return label_list.to(DEVICE), text_list.to(DEVICE), offsets.to(DEVICE)\n", + "\n", + "\n", + "# Create the datasets\n", + "train_dataset = to_map_style_dataset(train_data)\n", + "test_dataset = to_map_style_dataset(test_data)\n", + "\n", + "# We further divide the training data into a train and validation split.\n", + "train_split, valid_split = random_split(train_dataset, [0.95, 0.05])\n", + "\n", + "# Prepare the data loaders\n", + "train_dataloader = DataLoader(train_split, batch_size=LOADERS_BATCH_SIZE, shuffle=True, collate_fn=collate_fn)\n", + "valid_dataloader = DataLoader(valid_split, batch_size=LOADERS_BATCH_SIZE, shuffle=True, collate_fn=collate_fn)\n", + "test_dataloader = DataLoader(test_dataset, batch_size=LOADERS_BATCH_SIZE, shuffle=True, collate_fn=collate_fn)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "zgbK39d6oA14" + }, + "source": [ + "## Model training" + ] + }, + { + "cell_type": "markdown", + "source": [ + "### Define model" + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "code", + "execution_count": 22, + "outputs": [], + "source": [ + "class TextClassificationModel(nn.Module):\n", + " def __init__(self, vocab_size, embed_dim, num_class):\n", + " super(TextClassificationModel, self).__init__()\n", + " self.embedding = nn.EmbeddingBag(vocab_size, embed_dim, sparse=False)\n", + " self.fc = nn.Linear(embed_dim, num_class)\n", + " self.init_weights()\n", + "\n", + " def init_weights(self):\n", + " init_range = 0.5\n", + " self.embedding.weight.data.uniform_(-init_range, init_range)\n", + " self.fc.weight.data.uniform_(-init_range, init_range)\n", + " self.fc.bias.data.zero_()\n", + "\n", + " def forward(self, text, offsets):\n", + " embedded = self.embedding(text, offsets)\n", + " return self.fc(embedded).softmax(axis=-1)\n", + "\n", + "\n", + "model = TextClassificationModel(vocab_size=len(vocab), embed_dim=64, num_class=4).to(DEVICE)" + ], + "metadata": { + "collapsed": false, + "ExecuteTime": { + "end_time": "2023-08-22T14:01:16.715230Z", + "start_time": "2023-08-22T14:01:16.579679Z" + } + } + }, + { + "cell_type": "markdown", + "source": [ + "### Train and evaluate model" + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "tnYAEF9OoA14" + }, + "outputs": [], + "source": [ + "criterion = torch.nn.CrossEntropyLoss()\n", + "optimizer = torch.optim.SGD(model.parameters(), lr=5)\n", + "scheduler = torch.optim.lr_scheduler.StepLR(optimizer, 1, gamma=0.1)\n", + "\n", + "\n", + "def train_epoch(dataloader):\n", + " model.train()\n", + "\n", + " train_accuracy = total_count = 0\n", + " for label, text, offset in dataloader:\n", + " optimizer.zero_grad()\n", + " predicted_label = model(text, offset)\n", + " loss = criterion(predicted_label, label)\n", + " loss.backward()\n", + " torch.nn.utils.clip_grad_norm_(model.parameters(), 0.1)\n", + " optimizer.step()\n", + " train_accuracy += (predicted_label.argmax(1) == label).sum().item()\n", + " total_count += label.size(0)\n", + "\n", + " return train_accuracy / total_count\n", + "\n", + "\n", + "def validation_epoch(dataloader):\n", + " model.eval()\n", + "\n", + " validation_accuracy = total_count = 0\n", + " with torch.no_grad():\n", + " for label, text, offsets in dataloader:\n", + " predicted_label = model(text, offsets)\n", + " validation_accuracy += (predicted_label.argmax(1) == label).sum().item()\n", + " total_count += label.size(0)\n", + "\n", + " return validation_accuracy / total_count\n", + "\n", + "\n", + "total_accuracy = None\n", + "for epoch in range(1, 3):\n", + " start_time = time.perf_counter()\n", + "\n", + " train_epoch(train_dataloader)\n", + " accu_val = validation_epoch(valid_dataloader)\n", + "\n", + " if total_accuracy is not None and total_accuracy > accu_val:\n", + " scheduler.step()\n", + " else:\n", + " total_accuracy = accu_val\n", + "\n", + " print(\"-\" * 65)\n", + " print(f\"| end of epoch {epoch: .3f} | time: {time.perf_counter() - start_time :5.2f}s | valid accuracy {accu_val:8.3f} \")\n", + " print(\"-\" * 65)\n", + "\n", + "\n", + "test_accuracy = validation_epoch(test_dataloader)\n", + "print('Test accuracy {:8.3f}'.format(test_accuracy))" + ] + }, + { + "cell_type": "markdown", + "source": [ + "### Wrap model with Giskard" + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "code", + "execution_count": null, + "outputs": [], + "source": [ + "def infer_predictions(_model: torch.nn.Module, _dataloader: DataLoader) -> np.ndarray:\n", + " _model.eval()\n", + " pred = list()\n", + "\n", + " for _, text, offsets in _dataloader:\n", + " with torch.no_grad():\n", + " probs = model(text, offsets).cpu().detach().numpy()\n", + "\n", + " pred.append(probs)\n", + "\n", + " pred = np.concatenate(pred, axis=0)\n", + " return pred\n", + "\n", + "\n", + "def prediction_function(df) -> np.ndarray:\n", + " # Placeholder for label.\n", + " if df.shape[1] == 1:\n", + " df.insert(0, TARGET_COLUMN_NAME, np.zeros(len(df)))\n", + "\n", + " data_iterator = df.itertuples(index=False)\n", + " dataloader = DataLoader(to_map_style_dataset(data_iterator), batch_size=LOADERS_BATCH_SIZE, collate_fn=collate_fn)\n", + " predictions = infer_predictions(model, dataloader)\n", + " predictions = predictions\n", + "\n", + " return predictions\n", + "\n", + "\n", + "wrapped_model = Model(\n", + " model=prediction_function, # A prediction function that encapsulates all the data pre-processing steps and that could be executed with the\n", + " model_type=\"classification\", # Either regression, classification or text_generation.\n", + " name=\"Simple News Classification Model\", # Optional.\n", + " classification_labels=list(TARGET_MAP.values()), # Their order MUST be identical to the prediction_function's output order.\n", + " feature_names=[\"text\"], # Default: all columns of your dataset.\n", + ")\n", + "\n", + "# Validate wrapped model.\n", + "wrapped_test_metric = accuracy_score(wrapped_data.df[TARGET_COLUMN_NAME], wrapped_model.predict(wrapped_data).prediction)\n", + "print(f\"Wrapped Test accuracy: {wrapped_test_metric:.3f}\")" + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "markdown", + "source": [ + "## Scan model with Giskard\n", + "With the Giskard scan feature, you can detect vulnerabilities in your model, including performance biases, unrobustness, data leakage, stochasticity, underconfidence, ethical issues, and more. For detailed information about the scan feature, please refer to our scan documentation." + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "code", + "execution_count": null, + "outputs": [], + "source": [ + "results = scan(wrapped_model, wrapped_data)" + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "code", + "execution_count": 12, + "outputs": [ + { + "data": { + "text/html": "\n" + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "display(results)" + ], + "metadata": { + "collapsed": false, + "ExecuteTime": { + "end_time": "2023-08-22T13:19:45.209851Z", + "start_time": "2023-08-22T13:19:44.778914Z" + } + } + }, + { + "cell_type": "markdown", + "source": [ + "## Generate a test suite from the Scan\n", + "The objects produced by the scan can be used as fixtures to generate a test suite that integrate domain-specific issues. To create custom tests, refer to the Test your ML Model page." + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "code", + "execution_count": null, + "outputs": [], + "source": [ + "test_suite = results.generate_test_suite(\"My first test suite\")\n", + "test_suite.run()" + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "markdown", + "source": [ + "## Customize your suite by loading objects from the Giskard catalog\n", + "\n", + "The Giskard open source catalog will enable to load:\n", + "\n", + "* Tests such as metamorphic, performance, prediction & data drift, statistical tests, etc\n", + "* Slicing functions such as detectors of toxicity, hate, emotion, etc\n", + "* Transformation functions such as generators of typos, paraphrase, style tune, etc\n", + "\n", + "For demo purposes, we will load a simple unit test (test_f1) that checks if the test F1 score is above the given threshold. For more examples of tests and functions, refer to the Giskard catalog." + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "code", + "execution_count": null, + "outputs": [], + "source": [ + "test_suite.add_test(testing.test_f1(model=wrapped_model, dataset=wrapped_data, threshold=0.7)).run()" + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "markdown", + "source": [ + "## Upload your suite to the Giskard server\n", + "\n", + "Upload your suite to the Giskard server to:\n", + "\n", + "* Compare models to decide which model to promote\n", + "* Debug your tests to diagnose the issues\n", + "* Create more domain-specific tests that are integrating business feedback\n", + "* Share your results" + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "code", + "execution_count": null, + "outputs": [], + "source": [ + "# Uploading the test suite will automatically save the model, dataset, tests, slicing & transformation functions inside the Giskard UI server\n", + "# Create a Giskard client after having install the Giskard server (see documentation)\n", + "token = \"API_TOKEN\" # Find it in Settings in the Giskard server\n", + "client = GiskardClient(\n", + " url=\"http://localhost:19000\", # URL of your Giskard instance\n", + " token=token\n", + ")\n", + "\n", + "my_project = client.create_project(\"my_project\", \"PROJECT_NAME\", \"DESCRIPTION\")\n", + "\n", + "# Upload to the current project ✉️\n", + "test_suite.upload(client, \"my_project\")" + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "markdown", + "source": [ + "
\n", + "Connecting Google Colab with the Giskard server\n", + "\n", + "If you are using Google Colab and you want to install the Giskard server **locally**, you can run the Giskard server by executing this line in the terminal of your **local** machine (see the [documentation](https://docs.giskard.ai/en/latest/guides/installation_app/index.html)):\n", + "\n", + "> giskard server start\n", + "\n", + "Once the Giskard server is running, from the same terminal on your **local** machine, you can run:\n", + "\n", + "> giskard server expose --ngrok-token \\\n", + "\n", + "Read the flowing [instructions](https://docs.giskard.ai/en/latest/cli/ngrok/index.html) in order to get the `ngrok_API_token`. This will provide you with the code snippets that you can copy and paste into your Colab notebook to establish a connection with your locally installed Giskard server\n", + "
" + ], + "metadata": { + "collapsed": false + } } - ], - "source": [ - "display(results)" - ], - "metadata": { - "collapsed": false, - "ExecuteTime": { - "end_time": "2023-08-22T13:19:45.209851Z", - "start_time": "2023-08-22T13:19:44.778914Z" + ], + "metadata": { + "colab": { + "collapsed_sections": [], + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.10" } - } - }, - { - "cell_type": "markdown", - "source": [ - "## Generate a test suite from the Scan\n", - "The objects produced by the scan can be used as fixtures to generate a test suite that integrate domain-specific issues. To create custom tests, refer to the Test your ML Model page." - ], - "metadata": { - "collapsed": false - } - }, - { - "cell_type": "code", - "execution_count": null, - "outputs": [], - "source": [ - "test_suite = results.generate_test_suite(\"My first test suite\")\n", - "test_suite.run()" - ], - "metadata": { - "collapsed": false - } - }, - { - "cell_type": "markdown", - "source": [ - "## Customize your suite by loading objects from the Giskard catalog\n", - "\n", - "The Giskard open source catalog will enable to load:\n", - "\n", - "* Tests such as metamorphic, performance, prediction & data drift, statistical tests, etc\n", - "* Slicing functions such as detectors of toxicity, hate, emotion, etc\n", - "* Transformation functions such as generators of typos, paraphrase, style tune, etc\n", - "\n", - "For demo purposes, we will load a simple unit test (test_f1) that checks if the test F1 score is above the given threshold. For more examples of tests and functions, refer to the Giskard catalog." - ], - "metadata": { - "collapsed": false - } - }, - { - "cell_type": "code", - "execution_count": null, - "outputs": [], - "source": [ - "test_suite.add_test(testing.test_f1(model=wrapped_model, dataset=wrapped_data, threshold=0.7)).run()" - ], - "metadata": { - "collapsed": false - } - }, - { - "cell_type": "markdown", - "source": [ - "## Upload your suite to the Giskard server\n", - "\n", - "Upload your suite to the Giskard server to:\n", - "\n", - "* Compare models to decide which model to promote\n", - "* Debug your tests to diagnose the issues\n", - "* Create more domain-specific tests that are integrating business feedback\n", - "* Share your results" - ], - "metadata": { - "collapsed": false - } - }, - { - "cell_type": "code", - "execution_count": null, - "outputs": [], - "source": [ - "# Uploading the test suite will automatically save the model, dataset, tests, slicing & transformation functions inside the Giskard UI server\n", - "# Create a Giskard client after having install the Giskard server (see documentation)\n", - "token = \"API_TOKEN\" # Find it in Settings in the Giskard server\n", - "client = GiskardClient(\n", - " url=\"http://localhost:19000\", # URL of your Giskard instance\n", - " token=token\n", - ")\n", - "\n", - "my_project = client.create_project(\"my_project\", \"PROJECT_NAME\", \"DESCRIPTION\")\n", - "\n", - "# Upload to the current project ✉️\n", - "test_suite.upload(client, \"my_project\")" - ], - "metadata": { - "collapsed": false - } - }, - { - "cell_type": "markdown", - "source": [ - "
\n", - "Connecting Google Colab with the Giskard server\n", - "\n", - "If you are using Google Colab and you want to install the Giskard server **locally**, you can run the Giskard server by executing this line in the terminal of your **local** machine (see the [documentation](https://docs.giskard.ai/en/latest/guides/installation_app/index.html)):\n", - "\n", - "> giskard server start\n", - "\n", - "Once the Giskard server is running, from the same terminal on your **local** machine, you can run:\n", - "\n", - "> giskard server expose --token \\\n", - "\n", - "Read the flowing [instructions](https://docs.giskard.ai/en/latest/cli/ngrok/index.html) in order to get the `ngrok_API_token`. This will provide you with the code snippets that you can copy and paste into your Colab notebook to establish a connection with your locally installed Giskard server\n", - "
" - ], - "metadata": { - "collapsed": false - } - } - ], - "metadata": { - "colab": { - "collapsed_sections": [], - "provenance": [] - }, - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.10.10" - } - }, - "nbformat": 4, - "nbformat_minor": 1 -} + "nbformat": 4, + "nbformat_minor": 1 +} \ No newline at end of file diff --git a/python-client/docs/reference/notebooks/tripadvisor_sentiment_classification.ipynb b/python-client/docs/reference/notebooks/tripadvisor_sentiment_classification.ipynb index c8a323aa49..91c965e1e5 100644 --- a/python-client/docs/reference/notebooks/tripadvisor_sentiment_classification.ipynb +++ b/python-client/docs/reference/notebooks/tripadvisor_sentiment_classification.ipynb @@ -1,3205 +1,3205 @@ { - "cells": [ - { - "attachments": {}, - "cell_type": "markdown", - "id": "990eccb8", - "metadata": {}, - "source": [ - "# Tripadvisor reviews sentiment classification [HuggingFace]\n", - "\n", - "
\n", - "What is Giskard ?\n", - "\n", - "Giskard is an open-source testing framework dedicated to ML models, ranging from tabular to LLM. [To know more about Giskard, click here](https://docs.giskard.ai/en/latest/getting-started/index.html).\n", - "
" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "40f23d1a", - "metadata": {}, - "source": [ - "By running this notebook, you'll create a whole test suite in a few lines of code. The model used here is a BERT model for text classification. It is used to predict the sentiment of tripadvisor reviews (dataset from Kaggle)\n", - "\n", - "You'll learn how to:\n", - "\n", - "- Detect vulnerabilities by scanning the model\n", - "\n", - "- Generate a test suite with domain-specific tests\n", - "\n", - "- Customize your test suite by loading a test from the Giskard catalog\n", - "\n", - "- Upload your model to the Giskard server to:\n", - "\n", - " - Compare models to decide which one to promote\n", - "\n", - " - Debug your tests to diagnose issues\n", - "\n", - " - Share your results and collect business feedback from your team\n" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "da9a6eac", - "metadata": {}, - "source": [ - "## Install Giskard\n", - "\n", - "To see the list of Python requirements, please refer to [the documentation](https://docs.giskard.ai/en/latest/guides/installation_library/index.html)." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "904bb40c24cd2d02", - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "pip install \"giskard>=2.0.0b\" -U" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "58613d7d", - "metadata": {}, - "source": [ - "## Import libraries" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "7d960163", - "metadata": {}, - "outputs": [], - "source": [ - "\n", - "import os\n", - "import random\n", - "import re\n", - "import string\n", - "from dataclasses import dataclass\n", - "from pathlib import Path\n", - "from typing import Union, List\n", - "from urllib.request import urlretrieve\n", - "\n", - "import nltk\n", - "import numpy as np\n", - "import pandas as pd\n", - "import torch\n", - "from nltk.corpus import stopwords\n", - "from torch.utils.data import DataLoader\n", - "from torch.utils.data import TensorDataset\n", - "from transformers import DistilBertForSequenceClassification, DistilBertTokenizer\n", - "\n", - "nltk.download('stopwords')\n", - "\n", - "import giskard" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "d7cb9261", - "metadata": {}, - "source": [ - "## Import data" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "id": "e3c3e6a5", - "metadata": { - "ExecuteTime": { - "end_time": "2023-08-21T14:50:55.338261Z", - "start_time": "2023-08-21T14:50:55.226417Z" - } - }, - "outputs": [], - "source": [ - "# Define constants\n", - "TEXT_COLUMN_NAME = \"Review\"\n", - "TARGET_COLUMN_NAME = \"label\"\n", - "MAX_NUM_ROWS = 1000\n", - "DATA_URL = \"ftp://sys.giskard.ai/pub/unit_test_resources/tripadvisor_reviews_dataset/{}\"\n", - "DATA_PATH = Path.home() / \".giskard\" / \"tripadvisor_reviews_dataset\"\n", - "DATA_FILE_NAME = \"tripadvisor_hotel_reviews.csv\"\n", - "\n", - "\n", - "# Define data download and pre-processing functions\n", - "\n", - "def fetch_from_ftp(url: str, file: Path) -> None:\n", - " if not file.parent.exists():\n", - " file.parent.mkdir(parents=True, exist_ok=True)\n", - "\n", - " if not file.exists():\n", - " urlretrieve(url, file)\n", - "\n", - "\n", - "def create_label(x: int) -> int:\n", - " \"\"\"Map rating to the label.\"\"\"\n", - " if x in [1, 2]:\n", - " return 0\n", - " if x == 3:\n", - " return 1\n", - " if x in [4, 5]:\n", - " return 2\n", - "\n", - "\n", - "class TextCleaner:\n", - " \"\"\"Helper class to preprocess review's text.\"\"\"\n", - "\n", - " def __init__(self, clean_pattern: str = r\"[^A-ZĞÜŞİÖÇIa-zğüı'şöç0-9.\\\"',()]\"):\n", - " \"\"\"Constructor of the class.\"\"\"\n", - " self.clean_pattern = clean_pattern\n", - "\n", - " def __call__(self, text: Union[str, list]) -> List[List[str]]:\n", - " \"\"\"Perform cleaning.\"\"\"\n", - " if isinstance(text, str):\n", - " docs = [[text]]\n", - "\n", - " if isinstance(text, list):\n", - " docs = text\n", - "\n", - " text = [[re.sub(self.clean_pattern, \" \", sentence) for sentence in sentences] for sentences in docs]\n", - " return text\n", - "\n", - "\n", - "def remove_emoji(data: str) -> str:\n", - " \"\"\"Remove emoji from the text.\"\"\"\n", - " emoji = re.compile(\n", - " \"[\"\n", - " u\"\\U0001F600-\\U0001F64F\"\n", - " u\"\\U0001F300-\\U0001F5FF\"\n", - " u\"\\U0001F680-\\U0001F6FF\"\n", - " u\"\\U0001F1E0-\\U0001F1FF\"\n", - " u\"\\U00002500-\\U00002BEF\"\n", - " u\"\\U00002702-\\U000027B0\"\n", - " u\"\\U00002702-\\U000027B0\"\n", - " u\"\\U000024C2-\\U0001F251\"\n", - " u\"\\U0001f926-\\U0001f937\"\n", - " u\"\\U00010000-\\U0010ffff\"\n", - " u\"\\u2640-\\u2642\"\n", - " u\"\\u2600-\\u2B55\"\n", - " u\"\\u200d\"\n", - " u\"\\u23cf\"\n", - " u\"\\u23e9\"\n", - " u\"\\u231a\"\n", - " u\"\\ufe0f\"\n", - " u\"\\u3030\"\n", - " \"]+\",\n", - " re.UNICODE,\n", - " )\n", - " return re.sub(emoji, '', data)\n", - "\n", - "\n", - "regex = re.compile('[%s]' % re.escape(string.punctuation))\n", - "\n", - "\n", - "def remove_punctuation(text: str) -> str:\n", - " \"\"\"Remove punctuation from the text.\"\"\"\n", - " text = regex.sub(\" \", text)\n", - " return text\n", - "\n", - "\n", - "text_cleaner = TextCleaner()\n", - "\n", - "\n", - "def text_preprocessor(df: pd.DataFrame) -> pd.DataFrame:\n", - " \"\"\"Preprocess text.\"\"\"\n", - " # Remove emoji.\n", - " df[TEXT_COLUMN_NAME] = df[TEXT_COLUMN_NAME].apply(lambda x: remove_emoji(x))\n", - "\n", - " # Lower.\n", - " df[TEXT_COLUMN_NAME] = df[TEXT_COLUMN_NAME].apply(lambda x: x.lower())\n", - "\n", - " # Clean.\n", - " df[TEXT_COLUMN_NAME] = df[TEXT_COLUMN_NAME].apply(lambda x: text_cleaner(x)[0][0])\n", - "\n", - " # Remove punctuation.\n", - " df[TEXT_COLUMN_NAME] = df[TEXT_COLUMN_NAME].apply(lambda x: remove_punctuation(x))\n", - "\n", - " return df\n", - "\n", - "\n", - "def load_dataset() -> pd.DataFrame:\n", - " # Download dataset\n", - " fetch_from_ftp(DATA_URL.format(DATA_FILE_NAME), DATA_PATH / DATA_FILE_NAME)\n", - " df = pd.read_csv(DATA_PATH / DATA_FILE_NAME, nrows=MAX_NUM_ROWS)\n", - " # Obtain labels for our task.\n", - " df[TARGET_COLUMN_NAME] = df.Rating.apply(lambda x: create_label(x))\n", - " df.drop(columns=\"Rating\", inplace=True)\n", - " df = text_preprocessor(df)\n", - " return df" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "47dd12de", - "metadata": {}, - "source": [ - "### Load and wrap your dataset into Giskard" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "id": "8223c2c2", - "metadata": { - "ExecuteTime": { - "end_time": "2023-08-21T14:50:58.259826Z", - "start_time": "2023-08-21T14:50:58.077311Z" - } - }, - "outputs": [], - "source": [ - "df = load_dataset()\n", - "dataset = giskard.Dataset(\n", - " df=df, # A pandas.DataFrame that contains the raw data (before all the pre-processing steps) and the actual ground truth variable (target).\n", - " target=TARGET_COLUMN_NAME, # Ground truth variable.\n", - " name=\"trip_advisor_reviews_sentiment\", # Optional.\n", - ")" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "803e36d9", - "metadata": {}, - "source": [ - "## Create your model & wrap it into Giskard" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "5b964cd7", - "metadata": {}, - "source": [ - "### Create your model from HuggingFace" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "35761fc5", - "metadata": {}, - "outputs": [], - "source": [ - "# Constants\n", - "PRETRAINED_WEIGHTS_NAME = \"distilbert-base-uncased\"\n", - "STOP_WORDS = set(stopwords.words('english'))\n", - "RANDOM_SEED = 0\n", - "\n", - "# Set random seeds\n", - "random.seed(RANDOM_SEED)\n", - "np.random.seed(RANDOM_SEED)\n", - "torch.manual_seed(RANDOM_SEED)\n", - "torch.cuda.manual_seed_all(RANDOM_SEED)\n", - "\n", - "\n", - "@dataclass\n", - "class Config:\n", - " \"\"\"Configuration of Distill-BERT model.\"\"\"\n", - "\n", - " device = torch.device(\"cuda:0\" if torch.cuda.is_available() else \"cpu\")\n", - " batch_size = 128\n", - " seq_length = 150\n", - " add_special_tokens = True\n", - " return_attention_mask = True\n", - " pad_to_max_length = True\n", - " return_tensors = 'pt'\n", - "\n", - "\n", - "# Load tokenizer.\n", - "tokenizer = DistilBertTokenizer.from_pretrained(PRETRAINED_WEIGHTS_NAME)\n", - "\n", - "# Load model.\n", - "model = DistilBertForSequenceClassification.from_pretrained(\n", - " PRETRAINED_WEIGHTS_NAME, num_labels=3, output_attentions=False, output_hidden_states=False\n", - ").to(Config.device)\n", - "\n", - "\n", - "def create_dataloader(df: pd.DataFrame) -> DataLoader:\n", - " \"\"\"Create dataloader object with input data.\"\"\"\n", - "\n", - " def _create_dataset(encoded_data: dict) -> TensorDataset:\n", - " \"\"\"Create dataset object with input data.\"\"\"\n", - " input_ids = encoded_data['input_ids']\n", - " attention_masks = encoded_data['attention_mask']\n", - " return TensorDataset(input_ids, attention_masks)\n", - "\n", - " # Tokenize data.\n", - " encoded_data = tokenizer.batch_encode_plus(\n", - " df.Review.values,\n", - " add_special_tokens=Config.add_special_tokens,\n", - " return_attention_mask=Config.return_attention_mask,\n", - " pad_to_max_length=Config.pad_to_max_length,\n", - " max_length=Config.seq_length,\n", - " return_tensors=Config.return_tensors,\n", - " )\n", - "\n", - " # Create dataset object.\n", - " dataset = _create_dataset(encoded_data)\n", - "\n", - " # Create and return dataloader object.\n", - " return DataLoader(dataset, batch_size=Config.batch_size)\n", - "\n", - "\n", - "def infer_predictions(_model: torch.nn.Module, _dataloader: DataLoader) -> np.ndarray:\n", - " \"\"\"Perform inference using given model on given dataloader.\"\"\"\n", - " _model.eval()\n", - "\n", - " y_pred = list()\n", - " for batch in _dataloader:\n", - " batch = tuple(b.to(Config.device) for b in batch)\n", - " inputs = {'input_ids': batch[0], 'attention_mask': batch[1]}\n", - "\n", - " with torch.no_grad():\n", - " outputs = _model(**inputs)\n", - "\n", - " probs = torch.nn.functional.softmax(outputs.logits).detach().cpu().numpy()\n", - " y_pred.append(probs)\n", - "\n", - " y_pred = np.concatenate(y_pred, axis=0)\n", - " return y_pred\n", - "\n", - "\n", - "text_cleaner = TextCleaner()" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "b22fed46", - "metadata": {}, - "source": [ - "### Wrap your model in Giskard" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "id": "ccf8aa11", - "metadata": { - "ExecuteTime": { - "end_time": "2023-08-21T14:08:53.103877Z", - "start_time": "2023-08-21T14:08:53.050520Z" - } - }, - "outputs": [], - "source": [ - "class GiskardModelCustomWrapper(giskard.Model):\n", - " \"\"\"Custom giskard model wrapper.\"\"\"\n", - "\n", - " def model_predict(self, df: pd.DataFrame) -> np.ndarray:\n", - " \"\"\"Perform inference using overwritten prediction logic.\"\"\"\n", - " cleaned_df = text_preprocessor(df)\n", - " data_loader = create_dataloader(cleaned_df)\n", - " predicted_probabilities = infer_predictions(self.model, data_loader)\n", - " return predicted_probabilities" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "931c5a5c", - "metadata": {}, - "outputs": [], - "source": [ - "model = GiskardModelCustomWrapper(\n", - " model=model, # A prediction function that encapsulates all the data pre-processing steps and that could be executed with the dataset used by the scan.\n", - " model_type=\"classification\", # Either regression, classification or text_generation.\n", - " name=\"trip_advisor_sentiment_classifier\", # Optional.\n", - " classification_labels=[0, 1, 2], # Their order MUST be identical to the prediction_function's output order.\n", - " feature_names=[TEXT_COLUMN_NAME], # Default: all columns of your dataset.\n", - ")" - ] - }, - { - "cell_type": "markdown", - "id": "4d589fa4146c6b54", - "metadata": { - "collapsed": false - }, - "source": [ - "## Scan your model to find vulnerabilities\n", - "\n", - "With the Giskard scan feature, you can detect vulnerabilities in your model, including *performance biases*, *unrobustness*, *data leakage*, *stochasticity*, *underconfidence*, *ethical issues*, and *more*. For detailed information about the scan feature, please refer to our scan [documentation](https://docs.giskard.ai/en/latest/guides/scan/index.html)." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "931b100b", - "metadata": {}, - "outputs": [], - "source": [ - "results = giskard.scan(model, dataset)" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "id": "ecb49fa5", - "metadata": { - "ExecuteTime": { - "end_time": "2023-08-21T14:37:10.046888Z", - "start_time": "2023-08-21T14:37:09.851822Z" - } - }, - "outputs": [ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "id": "990eccb8", + "metadata": {}, + "source": [ + "# Tripadvisor reviews sentiment classification [HuggingFace]\n", + "\n", + "
\n", + "What is Giskard ?\n", + "\n", + "Giskard is an open-source testing framework dedicated to ML models, ranging from tabular to LLM. [To know more about Giskard, click here](https://docs.giskard.ai/en/latest/getting-started/index.html).\n", + "
" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "40f23d1a", + "metadata": {}, + "source": [ + "By running this notebook, you'll create a whole test suite in a few lines of code. The model used here is a BERT model for text classification. It is used to predict the sentiment of tripadvisor reviews (dataset from Kaggle)\n", + "\n", + "You'll learn how to:\n", + "\n", + "- Detect vulnerabilities by scanning the model\n", + "\n", + "- Generate a test suite with domain-specific tests\n", + "\n", + "- Customize your test suite by loading a test from the Giskard catalog\n", + "\n", + "- Upload your model to the Giskard server to:\n", + "\n", + " - Compare models to decide which one to promote\n", + "\n", + " - Debug your tests to diagnose issues\n", + "\n", + " - Share your results and collect business feedback from your team\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "da9a6eac", + "metadata": {}, + "source": [ + "## Install Giskard\n", + "\n", + "To see the list of Python requirements, please refer to [the documentation](https://docs.giskard.ai/en/latest/guides/installation_library/index.html)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "904bb40c24cd2d02", + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "pip install \"giskard>=2.0.0b\" -U" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "58613d7d", + "metadata": {}, + "source": [ + "## Import libraries" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7d960163", + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "import os\n", + "import random\n", + "import re\n", + "import string\n", + "from dataclasses import dataclass\n", + "from pathlib import Path\n", + "from typing import Union, List\n", + "from urllib.request import urlretrieve\n", + "\n", + "import nltk\n", + "import numpy as np\n", + "import pandas as pd\n", + "import torch\n", + "from nltk.corpus import stopwords\n", + "from torch.utils.data import DataLoader\n", + "from torch.utils.data import TensorDataset\n", + "from transformers import DistilBertForSequenceClassification, DistilBertTokenizer\n", + "\n", + "nltk.download('stopwords')\n", + "\n", + "import giskard" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "d7cb9261", + "metadata": {}, + "source": [ + "## Import data" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "e3c3e6a5", + "metadata": { + "ExecuteTime": { + "end_time": "2023-08-21T14:50:55.338261Z", + "start_time": "2023-08-21T14:50:55.226417Z" + } + }, + "outputs": [], + "source": [ + "# Define constants\n", + "TEXT_COLUMN_NAME = \"Review\"\n", + "TARGET_COLUMN_NAME = \"label\"\n", + "MAX_NUM_ROWS = 1000\n", + "DATA_URL = \"ftp://sys.giskard.ai/pub/unit_test_resources/tripadvisor_reviews_dataset/{}\"\n", + "DATA_PATH = Path.home() / \".giskard\" / \"tripadvisor_reviews_dataset\"\n", + "DATA_FILE_NAME = \"tripadvisor_hotel_reviews.csv\"\n", + "\n", + "\n", + "# Define data download and pre-processing functions\n", + "\n", + "def fetch_from_ftp(url: str, file: Path) -> None:\n", + " if not file.parent.exists():\n", + " file.parent.mkdir(parents=True, exist_ok=True)\n", + "\n", + " if not file.exists():\n", + " urlretrieve(url, file)\n", + "\n", + "\n", + "def create_label(x: int) -> int:\n", + " \"\"\"Map rating to the label.\"\"\"\n", + " if x in [1, 2]:\n", + " return 0\n", + " if x == 3:\n", + " return 1\n", + " if x in [4, 5]:\n", + " return 2\n", + "\n", + "\n", + "class TextCleaner:\n", + " \"\"\"Helper class to preprocess review's text.\"\"\"\n", + "\n", + " def __init__(self, clean_pattern: str = r\"[^A-ZĞÜŞİÖÇIa-zğüı'şöç0-9.\\\"',()]\"):\n", + " \"\"\"Constructor of the class.\"\"\"\n", + " self.clean_pattern = clean_pattern\n", + "\n", + " def __call__(self, text: Union[str, list]) -> List[List[str]]:\n", + " \"\"\"Perform cleaning.\"\"\"\n", + " if isinstance(text, str):\n", + " docs = [[text]]\n", + "\n", + " if isinstance(text, list):\n", + " docs = text\n", + "\n", + " text = [[re.sub(self.clean_pattern, \" \", sentence) for sentence in sentences] for sentences in docs]\n", + " return text\n", + "\n", + "\n", + "def remove_emoji(data: str) -> str:\n", + " \"\"\"Remove emoji from the text.\"\"\"\n", + " emoji = re.compile(\n", + " \"[\"\n", + " u\"\\U0001F600-\\U0001F64F\"\n", + " u\"\\U0001F300-\\U0001F5FF\"\n", + " u\"\\U0001F680-\\U0001F6FF\"\n", + " u\"\\U0001F1E0-\\U0001F1FF\"\n", + " u\"\\U00002500-\\U00002BEF\"\n", + " u\"\\U00002702-\\U000027B0\"\n", + " u\"\\U00002702-\\U000027B0\"\n", + " u\"\\U000024C2-\\U0001F251\"\n", + " u\"\\U0001f926-\\U0001f937\"\n", + " u\"\\U00010000-\\U0010ffff\"\n", + " u\"\\u2640-\\u2642\"\n", + " u\"\\u2600-\\u2B55\"\n", + " u\"\\u200d\"\n", + " u\"\\u23cf\"\n", + " u\"\\u23e9\"\n", + " u\"\\u231a\"\n", + " u\"\\ufe0f\"\n", + " u\"\\u3030\"\n", + " \"]+\",\n", + " re.UNICODE,\n", + " )\n", + " return re.sub(emoji, '', data)\n", + "\n", + "\n", + "regex = re.compile('[%s]' % re.escape(string.punctuation))\n", + "\n", + "\n", + "def remove_punctuation(text: str) -> str:\n", + " \"\"\"Remove punctuation from the text.\"\"\"\n", + " text = regex.sub(\" \", text)\n", + " return text\n", + "\n", + "\n", + "text_cleaner = TextCleaner()\n", + "\n", + "\n", + "def text_preprocessor(df: pd.DataFrame) -> pd.DataFrame:\n", + " \"\"\"Preprocess text.\"\"\"\n", + " # Remove emoji.\n", + " df[TEXT_COLUMN_NAME] = df[TEXT_COLUMN_NAME].apply(lambda x: remove_emoji(x))\n", + "\n", + " # Lower.\n", + " df[TEXT_COLUMN_NAME] = df[TEXT_COLUMN_NAME].apply(lambda x: x.lower())\n", + "\n", + " # Clean.\n", + " df[TEXT_COLUMN_NAME] = df[TEXT_COLUMN_NAME].apply(lambda x: text_cleaner(x)[0][0])\n", + "\n", + " # Remove punctuation.\n", + " df[TEXT_COLUMN_NAME] = df[TEXT_COLUMN_NAME].apply(lambda x: remove_punctuation(x))\n", + "\n", + " return df\n", + "\n", + "\n", + "def load_dataset() -> pd.DataFrame:\n", + " # Download dataset\n", + " fetch_from_ftp(DATA_URL.format(DATA_FILE_NAME), DATA_PATH / DATA_FILE_NAME)\n", + " df = pd.read_csv(DATA_PATH / DATA_FILE_NAME, nrows=MAX_NUM_ROWS)\n", + " # Obtain labels for our task.\n", + " df[TARGET_COLUMN_NAME] = df.Rating.apply(lambda x: create_label(x))\n", + " df.drop(columns=\"Rating\", inplace=True)\n", + " df = text_preprocessor(df)\n", + " return df" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "47dd12de", + "metadata": {}, + "source": [ + "### Load and wrap your dataset into Giskard" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "8223c2c2", + "metadata": { + "ExecuteTime": { + "end_time": "2023-08-21T14:50:58.259826Z", + "start_time": "2023-08-21T14:50:58.077311Z" + } + }, + "outputs": [], + "source": [ + "df = load_dataset()\n", + "dataset = giskard.Dataset(\n", + " df=df, # A pandas.DataFrame that contains the raw data (before all the pre-processing steps) and the actual ground truth variable (target).\n", + " target=TARGET_COLUMN_NAME, # Ground truth variable.\n", + " name=\"trip_advisor_reviews_sentiment\", # Optional.\n", + ")" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "803e36d9", + "metadata": {}, + "source": [ + "## Create your model & wrap it into Giskard" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "5b964cd7", + "metadata": {}, + "source": [ + "### Create your model from HuggingFace" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "35761fc5", + "metadata": {}, + "outputs": [], + "source": [ + "# Constants\n", + "PRETRAINED_WEIGHTS_NAME = \"distilbert-base-uncased\"\n", + "STOP_WORDS = set(stopwords.words('english'))\n", + "RANDOM_SEED = 0\n", + "\n", + "# Set random seeds\n", + "random.seed(RANDOM_SEED)\n", + "np.random.seed(RANDOM_SEED)\n", + "torch.manual_seed(RANDOM_SEED)\n", + "torch.cuda.manual_seed_all(RANDOM_SEED)\n", + "\n", + "\n", + "@dataclass\n", + "class Config:\n", + " \"\"\"Configuration of Distill-BERT model.\"\"\"\n", + "\n", + " device = torch.device(\"cuda:0\" if torch.cuda.is_available() else \"cpu\")\n", + " batch_size = 128\n", + " seq_length = 150\n", + " add_special_tokens = True\n", + " return_attention_mask = True\n", + " pad_to_max_length = True\n", + " return_tensors = 'pt'\n", + "\n", + "\n", + "# Load tokenizer.\n", + "tokenizer = DistilBertTokenizer.from_pretrained(PRETRAINED_WEIGHTS_NAME)\n", + "\n", + "# Load model.\n", + "model = DistilBertForSequenceClassification.from_pretrained(\n", + " PRETRAINED_WEIGHTS_NAME, num_labels=3, output_attentions=False, output_hidden_states=False\n", + ").to(Config.device)\n", + "\n", + "\n", + "def create_dataloader(df: pd.DataFrame) -> DataLoader:\n", + " \"\"\"Create dataloader object with input data.\"\"\"\n", + "\n", + " def _create_dataset(encoded_data: dict) -> TensorDataset:\n", + " \"\"\"Create dataset object with input data.\"\"\"\n", + " input_ids = encoded_data['input_ids']\n", + " attention_masks = encoded_data['attention_mask']\n", + " return TensorDataset(input_ids, attention_masks)\n", + "\n", + " # Tokenize data.\n", + " encoded_data = tokenizer.batch_encode_plus(\n", + " df.Review.values,\n", + " add_special_tokens=Config.add_special_tokens,\n", + " return_attention_mask=Config.return_attention_mask,\n", + " pad_to_max_length=Config.pad_to_max_length,\n", + " max_length=Config.seq_length,\n", + " return_tensors=Config.return_tensors,\n", + " )\n", + "\n", + " # Create dataset object.\n", + " dataset = _create_dataset(encoded_data)\n", + "\n", + " # Create and return dataloader object.\n", + " return DataLoader(dataset, batch_size=Config.batch_size)\n", + "\n", + "\n", + "def infer_predictions(_model: torch.nn.Module, _dataloader: DataLoader) -> np.ndarray:\n", + " \"\"\"Perform inference using given model on given dataloader.\"\"\"\n", + " _model.eval()\n", + "\n", + " y_pred = list()\n", + " for batch in _dataloader:\n", + " batch = tuple(b.to(Config.device) for b in batch)\n", + " inputs = {'input_ids': batch[0], 'attention_mask': batch[1]}\n", + "\n", + " with torch.no_grad():\n", + " outputs = _model(**inputs)\n", + "\n", + " probs = torch.nn.functional.softmax(outputs.logits).detach().cpu().numpy()\n", + " y_pred.append(probs)\n", + "\n", + " y_pred = np.concatenate(y_pred, axis=0)\n", + " return y_pred\n", + "\n", + "\n", + "text_cleaner = TextCleaner()" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "b22fed46", + "metadata": {}, + "source": [ + "### Wrap your model in Giskard" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "ccf8aa11", + "metadata": { + "ExecuteTime": { + "end_time": "2023-08-21T14:08:53.103877Z", + "start_time": "2023-08-21T14:08:53.050520Z" + } + }, + "outputs": [], + "source": [ + "class GiskardModelCustomWrapper(giskard.Model):\n", + " \"\"\"Custom giskard model wrapper.\"\"\"\n", + "\n", + " def model_predict(self, df: pd.DataFrame) -> np.ndarray:\n", + " \"\"\"Perform inference using overwritten prediction logic.\"\"\"\n", + " cleaned_df = text_preprocessor(df)\n", + " data_loader = create_dataloader(cleaned_df)\n", + " predicted_probabilities = infer_predictions(self.model, data_loader)\n", + " return predicted_probabilities" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "931c5a5c", + "metadata": {}, + "outputs": [], + "source": [ + "model = GiskardModelCustomWrapper(\n", + " model=model, # A prediction function that encapsulates all the data pre-processing steps and that could be executed with the dataset used by the scan.\n", + " model_type=\"classification\", # Either regression, classification or text_generation.\n", + " name=\"trip_advisor_sentiment_classifier\", # Optional.\n", + " classification_labels=[0, 1, 2], # Their order MUST be identical to the prediction_function's output order.\n", + " feature_names=[TEXT_COLUMN_NAME], # Default: all columns of your dataset.\n", + ")" + ] + }, { - "data": { - "text/html": [ - "\n", - "" + "cell_type": "markdown", + "id": "4d589fa4146c6b54", + "metadata": { + "collapsed": false + }, + "source": [ + "## Scan your model to find vulnerabilities\n", + "\n", + "With the Giskard scan feature, you can detect vulnerabilities in your model, including *performance biases*, *unrobustness*, *data leakage*, *stochasticity*, *underconfidence*, *ethical issues*, and *more*. For detailed information about the scan feature, please refer to our scan [documentation](https://docs.giskard.ai/en/latest/guides/scan/index.html)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "931b100b", + "metadata": {}, + "outputs": [], + "source": [ + "results = giskard.scan(model, dataset)" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "ecb49fa5", + "metadata": { + "ExecuteTime": { + "end_time": "2023-08-21T14:37:10.046888Z", + "start_time": "2023-08-21T14:37:09.851822Z" + } + }, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "display(results) # in your notebook" + ] + }, + { + "cell_type": "markdown", + "id": "8b3343fa", + "metadata": {}, + "source": [ + "As you see above, the model may detect various vulnerabilites by displaying:\n", + "\n", + "* Data slices showing unperformance, underconfidence, overconfidence or spurious correlations\n", + "* Data transformations creating robutness or ethical issues\n", + "* Examples making some tests fail\n" + ] + }, + { + "cell_type": "markdown", + "id": "0667bdc9", + "metadata": {}, + "source": [ + "## Generate a test suite from the Scan\n", + "\n", + "The objects produced by the scan can be used as fixtures to generate a test suite that integrate domain-specific issues. To create custom tests, refer to the [Test your ML Model](https://docs.giskard.ai/en/latest/guides/test-suite/index.html) page." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bea736a9", + "metadata": {}, + "outputs": [], + "source": [ + "test_suite = results.generate_test_suite(\"My first test suite\")\n", + "\n", + "# You can run the test suite locally to verify that it reproduces the issues\n", + "test_suite.run()" + ] + }, + { + "cell_type": "markdown", + "id": "882f4638", + "metadata": {}, + "source": [ + "## Customize your suite by loading objects from the Giskard catalog" + ] + }, + { + "cell_type": "markdown", + "id": "502a0767", + "metadata": {}, + "source": [ + "The Giskard open source catalog will enable to load:\n", + "\n", + "* **Tests** such as metamorphic, performance, prediction & data drift, statistical tests, etc\n", + "* **Slicing functions** such as detectors of toxicity, hate, emotion, etc\n", + "* **Transformation functions** such as generators of typos, paraphrase, style tune, etc\n", + "\n", + "For demo purposes, we will load a simple unit test ([test_right_label](https://docs.giskard.ai/en/latest/reference/tests/statistic.html#giskard.testing.test_right_label)) that checks if a given row (the first example) has the right label. For more examples of tests and functions, refer to the [Giskard catalog](https://docs.giskard.ai/en/latest/guides/catalog/index.html)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0c3a076", + "metadata": {}, + "outputs": [], + "source": [ + "# For the test_right_label test we are adding, all the parameters are specified except model\n", + "# This means that we will need to specify model everytime we run the suite: model is a global parameter of the suite\n", + "suite = test_suite \\\n", + " .add_test(\n", + " giskard.testing.test_right_label(dataset=dataset.iloc[[1]], classification_label=\"yes\", threshold=1)).run()" + ] + }, + { + "cell_type": "markdown", + "id": "cf824254", + "metadata": {}, + "source": [ + "## Upload your suite to the Giskard server\n", + "\n", + "
\n", + "Install Giskard Server\n", + "\n", + "To upload your suite to the Giskard Server you must first run the Giskard Server. Refer to the [documentation](https://docs.giskard.ai/en/latest/guides/installation_app/index.html).\n", + "
\n", + "\n", + "Upload your suite to the Giskard server to:\n", + "\n", + "- Compare models to decide which model to promote\n", + "- Debug your tests to diagnose the issues\n", + "- Create more domain-specific tests that are integrating business feedback\n", + "- Share your results" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8efd6bf3", + "metadata": {}, + "outputs": [], + "source": [ + "# Uploading the test suite will automatically save the model, dataset, tests, slicing & transformation functions inside the Giskard UI server\n", + "# Create a Giskard client aftern having install the Giskard server (see documentation)\n", + "token = \"API_TOKEN\" # Find it in Settings in the Giskard server\n", + "client = giskard.GiskardClient(\n", + " url=\"http://localhost:19000\", # URL of your Giskard instance\n", + " token=token\n", + ")\n", + "\n", + "my_project = client.create_project(\"my_project\", \"PROJECT_NAME\", \"DESCRIPTION\")\n", + "\n", + "# Upload to the current project ✉️\n", + "test_suite.upload(client, \"my_project\")" + ] + }, + { + "cell_type": "markdown", + "id": "639f0c2d048805be", + "metadata": { + "collapsed": false + }, + "source": [ + "
\n", + "Connecting Google Colab with the Giskard server\n", + "\n", + "If you are using Google Colab and you want to install the Giskard server **locally**, you can run the Giskard server by executing this line in the terminal of your **local** machine (see the [documentation](https://docs.giskard.ai/en/latest/guides/installation_app/index.html)):\n", + "\n", + "> giskard server start\n", + "\n", + "Once the Giskard server is running, from the same terminal on your **local** machine, you can run:\n", + "\n", + "> giskard server expose --ngrok-token \\\n", + "\n", + "Read the flowing [instructions](https://docs.giskard.ai/en/latest/cli/ngrok/index.html) in order to get the `ngrok_API_token`. This will provide you with the code snippets that you can copy and paste into your Colab notebook to establish a connection with your locally installed Giskard server\n", + "
" ] - }, - "metadata": {}, - "output_type": "display_data" } - ], - "source": [ - "display(results) # in your notebook" - ] - }, - { - "cell_type": "markdown", - "id": "8b3343fa", - "metadata": {}, - "source": [ - "As you see above, the model may detect various vulnerabilites by displaying:\n", - "\n", - "* Data slices showing unperformance, underconfidence, overconfidence or spurious correlations\n", - "* Data transformations creating robutness or ethical issues\n", - "* Examples making some tests fail\n" - ] - }, - { - "cell_type": "markdown", - "id": "0667bdc9", - "metadata": {}, - "source": [ - "## Generate a test suite from the Scan\n", - "\n", - "The objects produced by the scan can be used as fixtures to generate a test suite that integrate domain-specific issues. To create custom tests, refer to the [Test your ML Model](https://docs.giskard.ai/en/latest/guides/test-suite/index.html) page." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "bea736a9", - "metadata": {}, - "outputs": [], - "source": [ - "test_suite = results.generate_test_suite(\"My first test suite\")\n", - "\n", - "# You can run the test suite locally to verify that it reproduces the issues\n", - "test_suite.run()" - ] - }, - { - "cell_type": "markdown", - "id": "882f4638", - "metadata": {}, - "source": [ - "## Customize your suite by loading objects from the Giskard catalog" - ] - }, - { - "cell_type": "markdown", - "id": "502a0767", - "metadata": {}, - "source": [ - "The Giskard open source catalog will enable to load:\n", - "\n", - "* **Tests** such as metamorphic, performance, prediction & data drift, statistical tests, etc\n", - "* **Slicing functions** such as detectors of toxicity, hate, emotion, etc\n", - "* **Transformation functions** such as generators of typos, paraphrase, style tune, etc\n", - "\n", - "For demo purposes, we will load a simple unit test ([test_right_label](https://docs.giskard.ai/en/latest/reference/tests/statistic.html#giskard.testing.test_right_label)) that checks if a given row (the first example) has the right label. For more examples of tests and functions, refer to the [Giskard catalog](https://docs.giskard.ai/en/latest/guides/catalog/index.html)." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "b0c3a076", - "metadata": {}, - "outputs": [], - "source": [ - "# For the test_right_label test we are adding, all the parameters are specified except model\n", - "# This means that we will need to specify model everytime we run the suite: model is a global parameter of the suite\n", - "suite = test_suite \\\n", - " .add_test(\n", - " giskard.testing.test_right_label(dataset=dataset.iloc[[1]], classification_label=\"yes\", threshold=1)).run()" - ] - }, - { - "cell_type": "markdown", - "id": "cf824254", - "metadata": {}, - "source": [ - "## Upload your suite to the Giskard server\n", - "\n", - "
\n", - "Install Giskard Server\n", - "\n", - "To upload your suite to the Giskard Server you must first run the Giskard Server. Refer to the [documentation](https://docs.giskard.ai/en/latest/guides/installation_app/index.html).\n", - "
\n", - "\n", - "Upload your suite to the Giskard server to:\n", - "\n", - "- Compare models to decide which model to promote\n", - "- Debug your tests to diagnose the issues\n", - "- Create more domain-specific tests that are integrating business feedback\n", - "- Share your results" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "8efd6bf3", - "metadata": {}, - "outputs": [], - "source": [ - "# Uploading the test suite will automatically save the model, dataset, tests, slicing & transformation functions inside the Giskard UI server\n", - "# Create a Giskard client aftern having install the Giskard server (see documentation)\n", - "token = \"API_TOKEN\" # Find it in Settings in the Giskard server\n", - "client = giskard.GiskardClient(\n", - " url=\"http://localhost:19000\", # URL of your Giskard instance\n", - " token=token\n", - ")\n", - "\n", - "my_project = client.create_project(\"my_project\", \"PROJECT_NAME\", \"DESCRIPTION\")\n", - "\n", - "# Upload to the current project ✉️\n", - "test_suite.upload(client, \"my_project\")" - ] - }, - { - "cell_type": "markdown", - "id": "639f0c2d048805be", - "metadata": { - "collapsed": false - }, - "source": [ - "
\n", - "Connecting Google Colab with the Giskard server\n", - "\n", - "If you are using Google Colab and you want to install the Giskard server **locally**, you can run the Giskard server by executing this line in the terminal of your **local** machine (see the [documentation](https://docs.giskard.ai/en/latest/guides/installation_app/index.html)):\n", - "\n", - "> giskard server start\n", - "\n", - "Once the Giskard server is running, from the same terminal on your **local** machine, you can run:\n", - "\n", - "> giskard server expose --token \\\n", - "\n", - "Read the flowing [instructions](https://docs.giskard.ai/en/latest/cli/ngrok/index.html) in order to get the `ngrok_API_token`. This will provide you with the code snippets that you can copy and paste into your Colab notebook to establish a connection with your locally installed Giskard server\n", - "
" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": ".venv", - "language": "python", - "name": "python3" + ], + "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.11" + } }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.10.11" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} + "nbformat": 4, + "nbformat_minor": 5 +} \ No newline at end of file diff --git a/python-client/docs/reference/notebooks/wage_classification.ipynb b/python-client/docs/reference/notebooks/wage_classification.ipynb index 39f212533f..3708d3325c 100644 --- a/python-client/docs/reference/notebooks/wage_classification.ipynb +++ b/python-client/docs/reference/notebooks/wage_classification.ipynb @@ -1,2440 +1,2440 @@ { - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "collapsed": false - }, - "source": [ - "# Wage classification [sklearn]\n", - "* Binary classification to predict whether a person makes over 50K a year or not given their demographic variation.\n", - "* Reference notebook: \n", - "* Dataset: \n", - "\n", - "By running this notebook, you’ll create a whole test suite in a few lines of code. The model used here is a classification model with the adult census dataset and annual salary as a target feature. Feel free to use your own model (tabular, text, or LLM).\n", - "\n", - "You’ll learn how to:\n", - "\n", - "* Detect vulnerabilities by scanning the model\n", - "* Generate a test suite with domain-specific tests\n", - "* Customize your test suite by loading a test from the Giskard catalog\n", - "* Upload your model to the Giskard server to:\n", - "* Compare models to decide which one to promote\n", - "* Debug your tests to diagnose issues\n", - "* Share your results and collect business feedback from your team" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "collapsed": false - }, - "source": [ - "## Install Giskard" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "!pip install \"giskard>=2.0.0b\" -U" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "collapsed": false - }, - "source": [ - "## Import libraries" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "import os\n", - "from pathlib import Path\n", - "from urllib.request import urlretrieve\n", - "\n", - "import pandas as pd\n", - "from sklearn.pipeline import Pipeline\n", - "from sklearn.metrics import accuracy_score\n", - "from sklearn.compose import ColumnTransformer\n", - "from sklearn.ensemble import RandomForestClassifier\n", - "from sklearn.model_selection import train_test_split\n", - "from sklearn.preprocessing import StandardScaler, OneHotEncoder\n", - "\n", - "from giskard import Model, Dataset, scan, testing, GiskardClient" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "collapsed": false - }, - "source": [ - "## Define constants" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": { - "ExecuteTime": { - "end_time": "2023-08-21T14:38:32.262882Z", - "start_time": "2023-08-21T14:38:32.122012Z" + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "collapsed": false + }, + "source": [ + "# Wage classification [sklearn]\n", + "* Binary classification to predict whether a person makes over 50K a year or not given their demographic variation.\n", + "* Reference notebook: \n", + "* Dataset: \n", + "\n", + "By running this notebook, you’ll create a whole test suite in a few lines of code. The model used here is a classification model with the adult census dataset and annual salary as a target feature. Feel free to use your own model (tabular, text, or LLM).\n", + "\n", + "You’ll learn how to:\n", + "\n", + "* Detect vulnerabilities by scanning the model\n", + "* Generate a test suite with domain-specific tests\n", + "* Customize your test suite by loading a test from the Giskard catalog\n", + "* Upload your model to the Giskard server to:\n", + "* Compare models to decide which one to promote\n", + "* Debug your tests to diagnose issues\n", + "* Share your results and collect business feedback from your team" + ] }, - "collapsed": false - }, - "outputs": [], - "source": [ - "# Constants\n", - "RANDOM_SEED = 0\n", - "TEST_RATIO = 0.2\n", - "\n", - "DROP_FEATURES = [\n", - " 'education', \n", - " 'native-country', \n", - " 'occupation',\n", - " 'marital-status',\n", - " 'educational-num'\n", - "]\n", - "\n", - "CATEGORICAL_FEATURES = [\n", - " \"workclass\",\n", - " \"relationship\",\n", - " \"race\",\n", - " \"gender\"\n", - "]\n", - "\n", - "NUMERICAL_FEATURES = [\n", - " \"age\",\n", - " \"fnlwgt\",\n", - " \"capital-gain\",\n", - " \"capital-loss\",\n", - " \"hours-per-week\",\n", - "]\n", - "\n", - "TARGET_COLUMN = \"income\"\n", - "\n", - "# Paths.\n", - "DATA_URL = \"ftp://sys.giskard.ai/pub/unit_test_resources/wage_classification_dataset/adult.csv\"\n", - "DATA_PATH = Path.home() / \".giskard\" / \"wage_classification_dataset\" / \"adult.csv\"" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "collapsed": false - }, - "source": [ - "## Dataset preparation" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "collapsed": false - }, - "source": [ - "### Load and preprocess data" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": { - "ExecuteTime": { - "end_time": "2023-08-21T14:38:32.263195Z", - "start_time": "2023-08-21T14:38:32.194089Z" + { + "cell_type": "markdown", + "metadata": { + "collapsed": false + }, + "source": [ + "## Install Giskard" + ] }, - "collapsed": false - }, - "outputs": [], - "source": [ - "def fetch_from_ftp(url: str, file: Path) -> None:\n", - " \"\"\"Helper to fetch data from the FTP server.\"\"\"\n", - " if not file.parent.exists():\n", - " file.parent.mkdir(parents=True, exist_ok=True)\n", - "\n", - " if not file.exists():\n", - " print(f\"Downloading data from {url}\")\n", - " urlretrieve(url, file)\n", - "\n", - " print(f\"Data was loaded!\")\n", - "\n", - "\n", - "def download_data(**kwargs) -> pd.DataFrame:\n", - " \"\"\"Download the dataset using URL.\"\"\"\n", - " fetch_from_ftp(DATA_URL, DATA_PATH)\n", - " _df = pd.read_csv(DATA_PATH, **kwargs)\n", - " return _df\n", - "\n", - "\n", - "def preprocess_data(df: pd.DataFrame) -> pd.DataFrame:\n", - " # Drop NaNs and columns.\n", - " df = df.dropna()\n", - " df = df.drop(columns=DROP_FEATURES)\n", - " return df" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": { - "ExecuteTime": { - "end_time": "2023-08-21T14:38:32.493968Z", - "start_time": "2023-08-21T14:38:32.215060Z" + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "!pip install \"giskard>=2.0.0b\" -U" + ] }, - "collapsed": false - }, - "outputs": [ { - "name": "stdout", - "output_type": "stream", - "text": [ - "Data was loaded!\n" - ] - } - ], - "source": [ - "income_df = download_data()\n", - "income_df = preprocess_data(income_df)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "collapsed": false - }, - "source": [ - "### Train-test split" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": { - "ExecuteTime": { - "end_time": "2023-08-21T14:38:32.552628Z", - "start_time": "2023-08-21T14:38:32.505438Z" + "cell_type": "markdown", + "metadata": { + "collapsed": false + }, + "source": [ + "## Import libraries" + ] }, - "collapsed": false - }, - "outputs": [], - "source": [ - "X_train, X_test, y_train, y_test = train_test_split(income_df.drop(columns=TARGET_COLUMN), income_df[TARGET_COLUMN], \n", - " test_size=TEST_RATIO, random_state=RANDOM_SEED)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "collapsed": false - }, - "source": [ - "### Wrap dataset with Giskard" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": { - "ExecuteTime": { - "end_time": "2023-08-21T14:38:32.596732Z", - "start_time": "2023-08-21T14:38:32.531668Z" + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "import os\n", + "from pathlib import Path\n", + "from urllib.request import urlretrieve\n", + "\n", + "import pandas as pd\n", + "from sklearn.pipeline import Pipeline\n", + "from sklearn.metrics import accuracy_score\n", + "from sklearn.compose import ColumnTransformer\n", + "from sklearn.ensemble import RandomForestClassifier\n", + "from sklearn.model_selection import train_test_split\n", + "from sklearn.preprocessing import StandardScaler, OneHotEncoder\n", + "\n", + "from giskard import Model, Dataset, scan, testing, GiskardClient" + ] }, - "collapsed": false - }, - "outputs": [], - "source": [ - "raw_data = pd.concat([X_test, y_test], axis=1)\n", - "wrapped_data = Dataset(\n", - " df=raw_data, # A pandas.DataFrame that contains the raw data (before all the pre-processing steps) and the actual ground truth variable (target).\n", - " target=TARGET_COLUMN, # Ground truth variable.\n", - " name=\"salary_data\", # Optional.\n", - " cat_columns=CATEGORICAL_FEATURES # List of categorical columns. Optional, but is a MUST if available. Inferred automatically if not.\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "collapsed": false - }, - "source": [ - "## Model training" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "collapsed": false - }, - "source": [ - "### Define preprocessing pipeline" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": { - "ExecuteTime": { - "end_time": "2023-08-21T14:38:32.626086Z", - "start_time": "2023-08-21T14:38:32.572510Z" + { + "cell_type": "markdown", + "metadata": { + "collapsed": false + }, + "source": [ + "## Define constants" + ] }, - "collapsed": false - }, - "outputs": [], - "source": [ - "preprocessor = ColumnTransformer(transformers=[\n", - " (\"num\", StandardScaler(), NUMERICAL_FEATURES),\n", - " (\"cat\", OneHotEncoder(handle_unknown=\"ignore\", sparse=False), CATEGORICAL_FEATURES),\n", - "])" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "collapsed": false - }, - "source": [ - "### Build estimator" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "pipeline = Pipeline(steps=[\n", - " (\"preprocessor\", preprocessor),\n", - " (\"classifier\", RandomForestClassifier())\n", - "])\n", - "\n", - "pipeline.fit(X_train, y_train)\n", - "\n", - "# Accuracy score.\n", - "train_metric = pipeline.score(X_train, y_train)\n", - "test_metric = pipeline.score(X_test, y_test)\n", - "\n", - "print(f'Train accuracy: {train_metric:.2f}')\n", - "print(f'Test accuracy: {test_metric:.2f}')" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "collapsed": false - }, - "source": [ - "### Wrap model with Giskard" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "wrapped_model = Model(\n", - " model=pipeline, # A prediction function that encapsulates all the data pre-processing steps and that could be executed with the dataset used by the scan.\n", - " model_type=\"classification\", # Either regression, classification or text_generation.\n", - " name=\"salary_cls\", # Optional.\n", - " classification_labels=pipeline.classes_, # Their order MUST be identical to the prediction_function's output order.\n", - " feature_names=X_train.columns # Default: all columns of your dataset.\n", - ")\n", - "\n", - "# Validate wrapped model.\n", - "wrapped_predict = wrapped_model.predict(wrapped_data)\n", - "wrapped_test_metric = accuracy_score(y_test, wrapped_predict.prediction)\n", - "\n", - "print(f'Wrapped Test accuracy: {wrapped_test_metric:.2f}')" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "collapsed": false - }, - "source": [ - "## Scan your model to find vulnerabilities\n", - "With the Giskard scan feature, you can detect vulnerabilities in your model, including performance biases, unrobustness, data leakage, stochasticity, underconfidence, ethical issues, and more. For detailed information about the scan feature, please refer to our scan documentation." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "results = scan(model=wrapped_model, dataset=wrapped_data)" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": { - "ExecuteTime": { - "end_time": "2023-08-21T14:39:23.756484Z", - "start_time": "2023-08-21T14:39:22.889173Z" + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "ExecuteTime": { + "end_time": "2023-08-21T14:38:32.262882Z", + "start_time": "2023-08-21T14:38:32.122012Z" + }, + "collapsed": false + }, + "outputs": [], + "source": [ + "# Constants\n", + "RANDOM_SEED = 0\n", + "TEST_RATIO = 0.2\n", + "\n", + "DROP_FEATURES = [\n", + " 'education', \n", + " 'native-country', \n", + " 'occupation',\n", + " 'marital-status',\n", + " 'educational-num'\n", + "]\n", + "\n", + "CATEGORICAL_FEATURES = [\n", + " \"workclass\",\n", + " \"relationship\",\n", + " \"race\",\n", + " \"gender\"\n", + "]\n", + "\n", + "NUMERICAL_FEATURES = [\n", + " \"age\",\n", + " \"fnlwgt\",\n", + " \"capital-gain\",\n", + " \"capital-loss\",\n", + " \"hours-per-week\",\n", + "]\n", + "\n", + "TARGET_COLUMN = \"income\"\n", + "\n", + "# Paths.\n", + "DATA_URL = \"ftp://sys.giskard.ai/pub/unit_test_resources/wage_classification_dataset/adult.csv\"\n", + "DATA_PATH = Path.home() / \".giskard\" / \"wage_classification_dataset\" / \"adult.csv\"" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "collapsed": false + }, + "source": [ + "## Dataset preparation" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "collapsed": false + }, + "source": [ + "### Load and preprocess data" + ] }, - "collapsed": false - }, - "outputs": [ { - "data": { - "text/html": [ - "\n", - "" + "cell_type": "code", + "execution_count": 3, + "metadata": { + "ExecuteTime": { + "end_time": "2023-08-21T14:38:32.263195Z", + "start_time": "2023-08-21T14:38:32.194089Z" + }, + "collapsed": false + }, + "outputs": [], + "source": [ + "def fetch_from_ftp(url: str, file: Path) -> None:\n", + " \"\"\"Helper to fetch data from the FTP server.\"\"\"\n", + " if not file.parent.exists():\n", + " file.parent.mkdir(parents=True, exist_ok=True)\n", + "\n", + " if not file.exists():\n", + " print(f\"Downloading data from {url}\")\n", + " urlretrieve(url, file)\n", + "\n", + " print(f\"Data was loaded!\")\n", + "\n", + "\n", + "def download_data(**kwargs) -> pd.DataFrame:\n", + " \"\"\"Download the dataset using URL.\"\"\"\n", + " fetch_from_ftp(DATA_URL, DATA_PATH)\n", + " _df = pd.read_csv(DATA_PATH, **kwargs)\n", + " return _df\n", + "\n", + "\n", + "def preprocess_data(df: pd.DataFrame) -> pd.DataFrame:\n", + " # Drop NaNs and columns.\n", + " df = df.dropna()\n", + " df = df.drop(columns=DROP_FEATURES)\n", + " return df" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "ExecuteTime": { + "end_time": "2023-08-21T14:38:32.493968Z", + "start_time": "2023-08-21T14:38:32.215060Z" + }, + "collapsed": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Data was loaded!\n" + ] + } + ], + "source": [ + "income_df = download_data()\n", + "income_df = preprocess_data(income_df)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "collapsed": false + }, + "source": [ + "### Train-test split" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "ExecuteTime": { + "end_time": "2023-08-21T14:38:32.552628Z", + "start_time": "2023-08-21T14:38:32.505438Z" + }, + "collapsed": false + }, + "outputs": [], + "source": [ + "X_train, X_test, y_train, y_test = train_test_split(income_df.drop(columns=TARGET_COLUMN), income_df[TARGET_COLUMN], \n", + " test_size=TEST_RATIO, random_state=RANDOM_SEED)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "collapsed": false + }, + "source": [ + "### Wrap dataset with Giskard" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "ExecuteTime": { + "end_time": "2023-08-21T14:38:32.596732Z", + "start_time": "2023-08-21T14:38:32.531668Z" + }, + "collapsed": false + }, + "outputs": [], + "source": [ + "raw_data = pd.concat([X_test, y_test], axis=1)\n", + "wrapped_data = Dataset(\n", + " df=raw_data, # A pandas.DataFrame that contains the raw data (before all the pre-processing steps) and the actual ground truth variable (target).\n", + " target=TARGET_COLUMN, # Ground truth variable.\n", + " name=\"salary_data\", # Optional.\n", + " cat_columns=CATEGORICAL_FEATURES # List of categorical columns. Optional, but is a MUST if available. Inferred automatically if not.\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "collapsed": false + }, + "source": [ + "## Model training" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "collapsed": false + }, + "source": [ + "### Define preprocessing pipeline" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "ExecuteTime": { + "end_time": "2023-08-21T14:38:32.626086Z", + "start_time": "2023-08-21T14:38:32.572510Z" + }, + "collapsed": false + }, + "outputs": [], + "source": [ + "preprocessor = ColumnTransformer(transformers=[\n", + " (\"num\", StandardScaler(), NUMERICAL_FEATURES),\n", + " (\"cat\", OneHotEncoder(handle_unknown=\"ignore\", sparse=False), CATEGORICAL_FEATURES),\n", + "])" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "collapsed": false + }, + "source": [ + "### Build estimator" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "pipeline = Pipeline(steps=[\n", + " (\"preprocessor\", preprocessor),\n", + " (\"classifier\", RandomForestClassifier())\n", + "])\n", + "\n", + "pipeline.fit(X_train, y_train)\n", + "\n", + "# Accuracy score.\n", + "train_metric = pipeline.score(X_train, y_train)\n", + "test_metric = pipeline.score(X_test, y_test)\n", + "\n", + "print(f'Train accuracy: {train_metric:.2f}')\n", + "print(f'Test accuracy: {test_metric:.2f}')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "collapsed": false + }, + "source": [ + "### Wrap model with Giskard" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "wrapped_model = Model(\n", + " model=pipeline, # A prediction function that encapsulates all the data pre-processing steps and that could be executed with the dataset used by the scan.\n", + " model_type=\"classification\", # Either regression, classification or text_generation.\n", + " name=\"salary_cls\", # Optional.\n", + " classification_labels=pipeline.classes_, # Their order MUST be identical to the prediction_function's output order.\n", + " feature_names=X_train.columns # Default: all columns of your dataset.\n", + ")\n", + "\n", + "# Validate wrapped model.\n", + "wrapped_predict = wrapped_model.predict(wrapped_data)\n", + "wrapped_test_metric = accuracy_score(y_test, wrapped_predict.prediction)\n", + "\n", + "print(f'Wrapped Test accuracy: {wrapped_test_metric:.2f}')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "collapsed": false + }, + "source": [ + "## Scan your model to find vulnerabilities\n", + "With the Giskard scan feature, you can detect vulnerabilities in your model, including performance biases, unrobustness, data leakage, stochasticity, underconfidence, ethical issues, and more. For detailed information about the scan feature, please refer to our scan documentation." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "results = scan(model=wrapped_model, dataset=wrapped_data)" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": { + "ExecuteTime": { + "end_time": "2023-08-21T14:39:23.756484Z", + "start_time": "2023-08-21T14:39:22.889173Z" + }, + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "display(results)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "collapsed": false + }, + "source": [ + "## Generate a test suite from the Scan\n", + "The objects produced by the scan can be used as fixtures to generate a test suite that integrate domain-specific issues. To create custom tests, refer to the Test your ML Model page." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "test_suite = results.generate_test_suite(\"My first test suite\")\n", + "test_suite.run()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "collapsed": false + }, + "source": [ + "## Customize your suite by loading objects from the Giskard catalog\n", + "\n", + "The Giskard open source catalog will enable to load:\n", + "\n", + "* Tests such as metamorphic, performance, prediction & data drift, statistical tests, etc\n", + "* Slicing functions such as detectors of toxicity, hate, emotion, etc\n", + "* Transformation functions such as generators of typos, paraphrase, style tune, etc\n", + "\n", + "For demo purposes, we will load a simple unit test (test_f1) that checks if the test F1 score is above the given threshold. For more examples of tests and functions, refer to the Giskard catalog." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "test_suite.add_test(testing.test_f1(model=wrapped_model, dataset=wrapped_data, threshold=0.7)).run()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# Uploading the test suite will automatically save the model, dataset, tests, slicing & transformation functions inside the Giskard UI server\n", + "# Create a Giskard client after having install the Giskard server (see documentation)\n", + "token = \"API_TOKEN\" # Find it in Settings in the Giskard server\n", + "client = GiskardClient(\n", + " url=\"http://localhost:19000\", # URL of your Giskard instance\n", + " token=token\n", + ")\n", + "\n", + "my_project = client.create_project(\"my_project\", \"PROJECT_NAME\", \"DESCRIPTION\")\n", + "\n", + "# Upload to the current project ✉️\n", + "test_suite.upload(client, \"my_project\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "collapsed": false + }, + "source": [ + "
\n", + "Connecting Google Colab with the Giskard server\n", + "\n", + "If you are using Google Colab and you want to install the Giskard server **locally**, you can run the Giskard server by executing this line in the terminal of your **local** machine (see the [documentation](https://docs.giskard.ai/en/latest/guides/installation_app/index.html)):\n", + "\n", + "> giskard server start\n", + "\n", + "Once the Giskard server is running, from the same terminal on your **local** machine, you can run:\n", + "\n", + "> giskard server expose --ngrok-token \\\n", + "\n", + "Read the flowing [instructions](https://docs.giskard.ai/en/latest/cli/ngrok/index.html) in order to get the `ngrok_API_token`. This will provide you with the code snippets that you can copy and paste into your Colab notebook to establish a connection with your locally installed Giskard server\n", + "
" ] - }, - "metadata": {}, - "output_type": "display_data" } - ], - "source": [ - "display(results)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "collapsed": false - }, - "source": [ - "## Generate a test suite from the Scan\n", - "The objects produced by the scan can be used as fixtures to generate a test suite that integrate domain-specific issues. To create custom tests, refer to the Test your ML Model page." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "test_suite = results.generate_test_suite(\"My first test suite\")\n", - "test_suite.run()" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "collapsed": false - }, - "source": [ - "## Customize your suite by loading objects from the Giskard catalog\n", - "\n", - "The Giskard open source catalog will enable to load:\n", - "\n", - "* Tests such as metamorphic, performance, prediction & data drift, statistical tests, etc\n", - "* Slicing functions such as detectors of toxicity, hate, emotion, etc\n", - "* Transformation functions such as generators of typos, paraphrase, style tune, etc\n", - "\n", - "For demo purposes, we will load a simple unit test (test_f1) that checks if the test F1 score is above the given threshold. For more examples of tests and functions, refer to the Giskard catalog." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "test_suite.add_test(testing.test_f1(model=wrapped_model, dataset=wrapped_data, threshold=0.7)).run()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "# Uploading the test suite will automatically save the model, dataset, tests, slicing & transformation functions inside the Giskard UI server\n", - "# Create a Giskard client after having install the Giskard server (see documentation)\n", - "token = \"API_TOKEN\" # Find it in Settings in the Giskard server\n", - "client = GiskardClient(\n", - " url=\"http://localhost:19000\", # URL of your Giskard instance\n", - " token=token\n", - ")\n", - "\n", - "my_project = client.create_project(\"my_project\", \"PROJECT_NAME\", \"DESCRIPTION\")\n", - "\n", - "# Upload to the current project ✉️\n", - "test_suite.upload(client, \"my_project\")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "collapsed": false - }, - "source": [ - "
\n", - "Connecting Google Colab with the Giskard server\n", - "\n", - "If you are using Google Colab and you want to install the Giskard server **locally**, you can run the Giskard server by executing this line in the terminal of your **local** machine (see the [documentation](https://docs.giskard.ai/en/latest/guides/installation_app/index.html)):\n", - "\n", - "> giskard server start\n", - "\n", - "Once the Giskard server is running, from the same terminal on your **local** machine, you can run:\n", - "\n", - "> giskard server expose --token \\\n", - "\n", - "Read the flowing [instructions](https://docs.giskard.ai/en/latest/cli/ngrok/index.html) in order to get the `ngrok_API_token`. This will provide you with the code snippets that you can copy and paste into your Colab notebook to establish a connection with your locally installed Giskard server\n", - "
" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 2 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython2", + "version": "2.7.6" + } }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 2 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython2", - "version": "2.7.6" - } - }, - "nbformat": 4, - "nbformat_minor": 0 -} + "nbformat": 4, + "nbformat_minor": 0 +} \ No newline at end of file diff --git a/python-client/giskard/commands/cli_server.py b/python-client/giskard/commands/cli_server.py index 72b45a5b62..450b43e9b0 100644 --- a/python-client/giskard/commands/cli_server.py +++ b/python-client/giskard/commands/cli_server.py @@ -541,7 +541,7 @@ def clean(delete_data): @server.command("expose") @click.option( - "--token", + "--ngrok-token", "token", required=True, help="In case you have an ngrok account, you can use a token " From 67506888a5cc020f5569ada20105b0cc6bf1f49d Mon Sep 17 00:00:00 2001 From: Henrique Chaves Date: Thu, 21 Sep 2023 08:44:20 +0200 Subject: [PATCH 2/9] Update HF token cli param --- .../components/StartWorkerInstructions.vue | 28 +++++++++---------- .../docs/guides/installation_app/index.rst | 8 +++--- python-client/giskard/commands/cli_worker.py | 2 -- 3 files changed, 18 insertions(+), 20 deletions(-) diff --git a/frontend/src/components/StartWorkerInstructions.vue b/frontend/src/components/StartWorkerInstructions.vue index 0e260131ac..ef199864b1 100644 --- a/frontend/src/components/StartWorkerInstructions.vue +++ b/frontend/src/components/StartWorkerInstructions.vue @@ -1,6 +1,6 @@ " + "text/html": [ + "\n", + "" + ] }, "metadata": {}, "output_type": "display_data" @@ -223,9 +3257,9 @@ "source": [ "# Uploading the test suite will automatically save the model, dataset, tests, slicing & transformation functions inside the Giskard UI server\n", "# Create a Giskard client aftern having install the Giskard server (see documentation)\n", - "token = \"API_TOKEN\" # Find it in Settings in the Giskard server\n", + "key = \"API_KEY\" # Find it in Settings in the Giskard server\n", "client = giskard.GiskardClient(\n", - " url=\"http://localhost:19000\", token=token\n", + " url=\"http://localhost:19000\", key=key\n", ") # URL of your Giskard instance\n", "\n", "my_project = client.create_project(\"my_project\", \"PROJECT_NAME\", \"DESCRIPTION\")\n", @@ -236,6 +3270,9 @@ }, { "cell_type": "markdown", + "metadata": { + "collapsed": false + }, "source": [ "
\n", "Connecting Google Colab with the Giskard server\n", @@ -246,21 +3283,18 @@ "\n", "Once the Giskard server is running, from the same terminal on your **local** machine, you can run:\n", "\n", - "> giskard server expose --token \\\n", + "> giskard server expose --ngrok-token \\\n", "\n", "Read the flowing [instructions](https://docs.giskard.ai/en/latest/cli/ngrok/index.html) in order to get the `ngrok_API_token`. This will provide you with the code snippets that you can copy and paste into your Colab notebook to establish a connection with your locally installed Giskard server\n", "
" - ], - "metadata": { - "collapsed": false - } + ] } ], "metadata": { "kernelspec": { - "name": "python3", + "display_name": "Python 3 (ipykernel)", "language": "python", - "display_name": "Python 3 (ipykernel)" + "name": "python3" }, "language_info": { "codemirror_mode": { diff --git a/python-client/docs/reference/notebooks/airline_tweets_sentiment_analysis.ipynb b/python-client/docs/reference/notebooks/airline_tweets_sentiment_analysis.ipynb index 186ecca3a5..8cb7801d98 100644 --- a/python-client/docs/reference/notebooks/airline_tweets_sentiment_analysis.ipynb +++ b/python-client/docs/reference/notebooks/airline_tweets_sentiment_analysis.ipynb @@ -2,6 +2,9 @@ "cells": [ { "cell_type": "markdown", + "metadata": { + "collapsed": false + }, "source": [ "# Airline tweets sentiment analysis [HuggingFace]\n", "\n", @@ -16,79 +19,76 @@ " * Compare models to decide which one to promote\n", " * Debug your tests to diagnose issues\n", " * Share your results and collect business feedback from your team" - ], - "metadata": { - "collapsed": false - } + ] }, { "cell_type": "markdown", - "source": [ - "## Install Giskard" - ], "metadata": { "collapsed": false - } + }, + "source": [ + "## Install Giskard" + ] }, { "cell_type": "code", "execution_count": 1, - "outputs": [], - "source": [ - "!pip install \"giskard>=2.0.0b\" -U" - ], "metadata": { - "collapsed": false, "ExecuteTime": { "end_time": "2023-08-22T14:17:15.192026Z", "start_time": "2023-08-22T14:17:15.133010Z" - } - } + }, + "collapsed": false + }, + "outputs": [], + "source": [ + "!pip install \"giskard>=2.0.0b\" -U" + ] }, { "cell_type": "markdown", - "source": [ - "## Install other libraries" - ], "metadata": { "collapsed": false - } + }, + "source": [ + "## Install other libraries" + ] }, { "cell_type": "code", "execution_count": 2, - "outputs": [], - "source": [ - "!pip install --upgrade accelerate" - ], "metadata": { - "collapsed": false, "ExecuteTime": { "end_time": "2023-08-22T14:17:15.840873Z", "start_time": "2023-08-22T14:17:15.800112Z" - } - } + }, + "collapsed": false + }, + "outputs": [], + "source": [ + "!pip install --upgrade accelerate" + ] }, { "cell_type": "markdown", - "source": [ - "## Import libraries" - ], "metadata": { "collapsed": false - } + }, + "source": [ + "## Import libraries" + ] }, { "cell_type": "code", "execution_count": 14, "metadata": { - "id": "XA0apbYgxWgg", - "pycharm": { - "name": "#%%\n" - }, "ExecuteTime": { "end_time": "2023-08-22T14:18:47.767265Z", "start_time": "2023-08-22T14:18:47.707042Z" + }, + "id": "XA0apbYgxWgg", + "pycharm": { + "name": "#%%\n" } }, "outputs": [], @@ -107,16 +107,23 @@ }, { "cell_type": "markdown", - "source": [ - "## Define constants" - ], "metadata": { "collapsed": false - } + }, + "source": [ + "## Define constants" + ] }, { "cell_type": "code", "execution_count": 4, + "metadata": { + "ExecuteTime": { + "end_time": "2023-08-22T14:17:51.557083Z", + "start_time": "2023-08-22T14:17:51.450235Z" + }, + "collapsed": false + }, "outputs": [], "source": [ "# Constants.\n", @@ -131,36 +138,36 @@ "# Paths.\n", "MODEL_NAME = \"Souvikcmsa/SentimentAnalysisDistillBERT\"\n", "DATA_URL = 'https://raw.githubusercontent.com/Giskard-AI/examples/main/datasets/twitter_us_airline_sentiment_analysis.csv'" - ], - "metadata": { - "collapsed": false, - "ExecuteTime": { - "end_time": "2023-08-22T14:17:51.557083Z", - "start_time": "2023-08-22T14:17:51.450235Z" - } - } + ] }, { "cell_type": "markdown", - "source": [ - "## Dataset preparation" - ], "metadata": { "collapsed": false - } + }, + "source": [ + "## Dataset preparation" + ] }, { "cell_type": "markdown", - "source": [ - "### Load and preprocess data" - ], "metadata": { "collapsed": false - } + }, + "source": [ + "### Load and preprocess data" + ] }, { "cell_type": "code", "execution_count": 5, + "metadata": { + "ExecuteTime": { + "end_time": "2023-08-22T14:17:51.694840Z", + "start_time": "2023-08-22T14:17:51.473691Z" + }, + "collapsed": false + }, "outputs": [], "source": [ "def load_preprocess_data():\n", @@ -171,53 +178,53 @@ "\n", "\n", "data = load_preprocess_data()" - ], - "metadata": { - "collapsed": false, - "ExecuteTime": { - "end_time": "2023-08-22T14:17:51.694840Z", - "start_time": "2023-08-22T14:17:51.473691Z" - } - } + ] }, { "cell_type": "markdown", - "source": [ - "### Train-test split" - ], "metadata": { "collapsed": false - } + }, + "source": [ + "### Train-test split" + ] }, { "cell_type": "code", "execution_count": 6, + "metadata": { + "ExecuteTime": { + "end_time": "2023-08-22T14:17:51.695873Z", + "start_time": "2023-08-22T14:17:51.619643Z" + }, + "collapsed": false + }, "outputs": [], "source": [ "X_train, X_test, y_train, y_test = train_test_split(data[[TEXT_COLUMN_NAME]], \n", " data[TARGET_COLUMN_NAME], \n", " random_state=RANDOM_SEED)" - ], - "metadata": { - "collapsed": false, - "ExecuteTime": { - "end_time": "2023-08-22T14:17:51.695873Z", - "start_time": "2023-08-22T14:17:51.619643Z" - } - } + ] }, { "cell_type": "markdown", - "source": [ - "### Wrap dataset with Giskard" - ], "metadata": { "collapsed": false - } + }, + "source": [ + "### Wrap dataset with Giskard" + ] }, { "cell_type": "code", "execution_count": 7, + "metadata": { + "ExecuteTime": { + "end_time": "2023-08-22T14:17:51.696133Z", + "start_time": "2023-08-22T14:17:51.643869Z" + }, + "collapsed": false + }, "outputs": [], "source": [ "raw_data = pd.concat([X_test, y_test.map(TARGET_INT_STR)], axis=1)\n", @@ -226,36 +233,36 @@ " target=TARGET_COLUMN_NAME, # Ground truth variable\n", " name=\"Tweets sentiment dataset\" # Optional\n", ")" - ], - "metadata": { - "collapsed": false, - "ExecuteTime": { - "end_time": "2023-08-22T14:17:51.696133Z", - "start_time": "2023-08-22T14:17:51.643869Z" - } - } + ] }, { "cell_type": "markdown", - "source": [ - "## Model training" - ], "metadata": { "collapsed": false - } + }, + "source": [ + "## Model training" + ] }, { "cell_type": "markdown", - "source": [ - "### Define 'torch.Dataset' objects." - ], "metadata": { "collapsed": false - } + }, + "source": [ + "### Define 'torch.Dataset' objects." + ] }, { "cell_type": "code", "execution_count": 8, + "metadata": { + "ExecuteTime": { + "end_time": "2023-08-22T14:17:53.944150Z", + "start_time": "2023-08-22T14:17:51.667196Z" + }, + "collapsed": false + }, "outputs": [], "source": [ "class CustomDataset(TorchDataset):\n", @@ -283,35 +290,28 @@ "\n", "train_dataset = CustomDataset(X_train_tokenized, y_train.values.tolist())\n", "val_dataset = CustomDataset(X_test_tokenized, y_test.values.tolist())" - ], - "metadata": { - "collapsed": false, - "ExecuteTime": { - "end_time": "2023-08-22T14:17:53.944150Z", - "start_time": "2023-08-22T14:17:51.667196Z" - } - } + ] }, { "cell_type": "markdown", - "source": [ - "### Define model" - ], "metadata": { "collapsed": false - } + }, + "source": [ + "### Define model" + ] }, { "cell_type": "code", "execution_count": 9, "metadata": { - "id": "5oseyfAwQTdr", - "pycharm": { - "name": "#%%\n" - }, "ExecuteTime": { "end_time": "2023-08-22T14:17:56.952666Z", "start_time": "2023-08-22T14:17:53.945664Z" + }, + "id": "5oseyfAwQTdr", + "pycharm": { + "name": "#%%\n" } }, "outputs": [], @@ -325,24 +325,24 @@ }, { "cell_type": "markdown", - "source": [ - "### Define trainer object" - ], "metadata": { "collapsed": false - } + }, + "source": [ + "### Define trainer object" + ] }, { "cell_type": "code", "execution_count": 10, "metadata": { - "id": "eO67ZdIBekE5", - "pycharm": { - "name": "#%%\n" - }, "ExecuteTime": { "end_time": "2023-08-22T14:18:09.366029Z", "start_time": "2023-08-22T14:18:09.240468Z" + }, + "id": "eO67ZdIBekE5", + "pycharm": { + "name": "#%%\n" } }, "outputs": [], @@ -381,24 +381,24 @@ }, { "cell_type": "markdown", - "source": [ - "### Train and evaluate model" - ], "metadata": { "collapsed": false - } + }, + "source": [ + "### Train and evaluate model" + ] }, { "cell_type": "code", "execution_count": null, + "metadata": { + "collapsed": false + }, "outputs": [], "source": [ "trainer.train()\n", "trainer.evaluate()" - ], - "metadata": { - "collapsed": false - } + ] }, { "cell_type": "markdown", @@ -463,21 +463,1615 @@ { "cell_type": "code", "execution_count": null, + "metadata": { + "collapsed": false + }, "outputs": [], "source": [ "results = scan(wrapped_model, wrapped_data)" - ], - "metadata": { - "collapsed": false - } + ] }, { "cell_type": "code", "execution_count": 19, + "metadata": { + "ExecuteTime": { + "end_time": "2023-08-22T14:23:48.097505Z", + "start_time": "2023-08-22T14:23:47.605180Z" + }, + "collapsed": false + }, "outputs": [ { "data": { - "text/html": "\n" + "text/html": [ + "\n", + "" + ] }, "metadata": {}, "output_type": "display_data" @@ -485,39 +2079,35 @@ ], "source": [ "display(results)" - ], - "metadata": { - "collapsed": false, - "ExecuteTime": { - "end_time": "2023-08-22T14:23:48.097505Z", - "start_time": "2023-08-22T14:23:47.605180Z" - } - } + ] }, { "cell_type": "markdown", + "metadata": { + "collapsed": false + }, "source": [ "## Generate a test suite from the Scan\n", "The objects produced by the scan can be used as fixtures to generate a test suite that integrate domain-specific issues. To create custom tests, refer to the Test your ML Model page." - ], - "metadata": { - "collapsed": false - } + ] }, { "cell_type": "code", "execution_count": null, + "metadata": { + "collapsed": false + }, "outputs": [], "source": [ "test_suite = results.generate_test_suite(\"My first test suite\")\n", "test_suite.run()" - ], - "metadata": { - "collapsed": false - } + ] }, { "cell_type": "markdown", + "metadata": { + "collapsed": false + }, "source": [ "## Customize your suite by loading objects from the Giskard catalog\n", "\n", @@ -528,24 +2118,24 @@ "* Transformation functions such as generators of typos, paraphrase, style tune, etc\n", "\n", "For demo purposes, we will load a simple unit test (test_f1) that checks if the test F1 score is above the given threshold. For more examples of tests and functions, refer to the Giskard catalog." - ], - "metadata": { - "collapsed": false - } + ] }, { "cell_type": "code", "execution_count": null, + "metadata": { + "collapsed": false + }, "outputs": [], "source": [ "test_suite.add_test(testing.test_f1(model=wrapped_model, dataset=wrapped_data, threshold=0.7)).run()" - ], - "metadata": { - "collapsed": false - } + ] }, { "cell_type": "markdown", + "metadata": { + "collapsed": false + }, "source": [ "## Upload your suite to the Giskard server\n", "\n", @@ -555,35 +2145,35 @@ "* Debug your tests to diagnose the issues\n", "* Create more domain-specific tests that are integrating business feedback\n", "* Share your results" - ], - "metadata": { - "collapsed": false - } + ] }, { "cell_type": "code", "execution_count": null, + "metadata": { + "collapsed": false + }, "outputs": [], "source": [ "# Uploading the test suite will automatically save the model, dataset, tests, slicing & transformation functions inside the Giskard UI server\n", "# Create a Giskard client after having install the Giskard server (see documentation)\n", - "token = \"API_TOKEN\" # Find it in Settings in the Giskard server\n", + "key = \"API_KEY\" # Find it in Settings in the Giskard server\n", "client = GiskardClient(\n", " url=\"http://localhost:19000\", # URL of your Giskard instance\n", - " token=token\n", + " key=key\n", ")\n", "\n", "my_project = client.create_project(\"my_project\", \"PROJECT_NAME\", \"DESCRIPTION\")\n", "\n", "# Upload to the current project ✉️\n", "test_suite.upload(client, \"my_project\")" - ], - "metadata": { - "collapsed": false - } + ] }, { "cell_type": "markdown", + "metadata": { + "collapsed": false + }, "source": [ "
\n", "Connecting Google Colab with the Giskard server\n", @@ -594,14 +2184,11 @@ "\n", "Once the Giskard server is running, from the same terminal on your **local** machine, you can run:\n", "\n", - "> giskard server expose --token \\\n", + "> giskard server expose --ngrok-token \\\n", "\n", "Read the flowing [instructions](https://docs.giskard.ai/en/latest/cli/ngrok/index.html) in order to get the `ngrok_API_token`. This will provide you with the code snippets that you can copy and paste into your Colab notebook to establish a connection with your locally installed Giskard server\n", "
" - ], - "metadata": { - "collapsed": false - } + ] } ], "metadata": { diff --git a/python-client/docs/reference/notebooks/amazon_review_classification_sklearn.ipynb b/python-client/docs/reference/notebooks/amazon_review_classification_sklearn.ipynb index 41f9c0873a..3728886d29 100644 --- a/python-client/docs/reference/notebooks/amazon_review_classification_sklearn.ipynb +++ b/python-client/docs/reference/notebooks/amazon_review_classification_sklearn.ipynb @@ -2643,10 +2643,10 @@ "source": [ "# Uploading the test suite will automatically save the model, dataset, tests, slicing & transformation functions inside the Giskard UI server\n", "# Create a Giskard client after having install the Giskard server (see documentation)\n", - "token = \"API_TOKEN\" # Find it in Settings in the Giskard server\n", + "key = \"API_KEY\" # Find it in Settings in the Giskard server\n", "client = GiskardClient(\n", " url=\"http://localhost:19000\", # URL of your Giskard instance\n", - " token=token\n", + " key=key\n", ")\n", "\n", "my_project = client.create_project(\"my_project\", \"PROJECT_NAME\", \"DESCRIPTION\")\n", @@ -2698,4 +2698,4 @@ }, "nbformat": 4, "nbformat_minor": 1 -} \ No newline at end of file +} diff --git a/python-client/docs/reference/notebooks/cancer_detection_xgboost.ipynb b/python-client/docs/reference/notebooks/cancer_detection_xgboost.ipynb index 2a0b941537..cd14de0fb1 100644 --- a/python-client/docs/reference/notebooks/cancer_detection_xgboost.ipynb +++ b/python-client/docs/reference/notebooks/cancer_detection_xgboost.ipynb @@ -54,18 +54,18 @@ { "cell_type": "code", "execution_count": 13, + "id": "eb828d6da954f51d", "metadata": { - "collapsed": false, "ExecuteTime": { "end_time": "2023-08-21T11:49:35.078459Z", "start_time": "2023-08-21T11:49:35.038489Z" - } + }, + "collapsed": false }, "outputs": [], "source": [ "pip install \"giskard>=2.0.0b\" -U" - ], - "id": "eb828d6da954f51d" + ] }, { "attachments": {}, @@ -206,6 +206,7 @@ }, { "cell_type": "markdown", + "id": "4fa6a666db37d7af", "metadata": { "collapsed": false }, @@ -213,8 +214,7 @@ "## Scan your model to find vulnerabilities\n", "\n", "With the Giskard scan feature, you can detect vulnerabilities in your model, including *performance biases*, *unrobustness*, *data leakage*, *stochasticity*, *underconfidence*, *ethical issues*, and *more*. For detailed information about the scan feature, please refer to our scan [documentation](https://docs.giskard.ai/en/latest/guides/scan/index.html)." - ], - "id": "4fa6a666db37d7af" + ] }, { "cell_type": "code", @@ -239,7 +239,2473 @@ "outputs": [ { "data": { - "text/html": "\n" + "text/html": [ + "\n", + "" + ] }, "metadata": {}, "output_type": "display_data" @@ -350,10 +2816,10 @@ "source": [ "# Uploading the test suite will automatically save the model, dataset, tests, slicing & transformation functions inside the Giskard UI server\n", "# Create a Giskard client aftern having install the Giskard server (see documentation)\n", - "token = \"API_TOKEN\" # Find it in Settings in the Giskard server\n", + "key = \"API_KEY\" # Find it in Settings in the Giskard server\n", "client = giskard.GiskardClient(\n", " url=\"http://localhost:19000\", # URL of your Giskard instance\n", - " token=token\n", + " key=key\n", ")\n", "\n", "my_project = client.create_project(\"my_project\", \"PROJECT_NAME\", \"DESCRIPTION\")\n", @@ -364,6 +2830,10 @@ }, { "cell_type": "markdown", + "id": "193983c206c0103f", + "metadata": { + "collapsed": false + }, "source": [ "
\n", "Connecting Google Colab with the Giskard server\n", @@ -378,11 +2848,7 @@ "\n", "Read the flowing [instructions](https://docs.giskard.ai/en/latest/cli/ngrok/index.html) in order to get the `ngrok_API_token`. This will provide you with the code snippets that you can copy and paste into your Colab notebook to establish a connection with your locally installed Giskard server\n", "
" - ], - "metadata": { - "collapsed": false - }, - "id": "193983c206c0103f" + ] } ], "metadata": { @@ -406,4 +2872,4 @@ }, "nbformat": 4, "nbformat_minor": 5 -} \ No newline at end of file +} diff --git a/python-client/docs/reference/notebooks/credit_scoring.ipynb b/python-client/docs/reference/notebooks/credit_scoring.ipynb index 40a429e08a..55675d349a 100644 --- a/python-client/docs/reference/notebooks/credit_scoring.ipynb +++ b/python-client/docs/reference/notebooks/credit_scoring.ipynb @@ -2,6 +2,9 @@ "cells": [ { "cell_type": "markdown", + "metadata": { + "collapsed": false + }, "source": [ "# German credit scoring [sklearn]\n", "\n", @@ -15,53 +18,50 @@ " * Compare models to decide which one to promote\n", " * Debug your tests to diagnose issues\n", " * Share your results and collect business feedback from your team" - ], - "metadata": { - "collapsed": false - } + ] }, { "cell_type": "markdown", - "source": [ - "## Install Giskard" - ], "metadata": { "collapsed": false - } + }, + "source": [ + "## Install Giskard" + ] }, { "cell_type": "code", "execution_count": 22, - "outputs": [], - "source": [ - "pip install \"giskard>=2.0.0b\" -U" - ], "metadata": { - "collapsed": false, "ExecuteTime": { "end_time": "2023-08-21T12:06:20.563432Z", "start_time": "2023-08-21T12:06:20.538551Z" - } - } + }, + "collapsed": false + }, + "outputs": [], + "source": [ + "pip install \"giskard>=2.0.0b\" -U" + ] }, { "cell_type": "markdown", - "source": [ - "## Import libraries" - ], "metadata": { "collapsed": false - } + }, + "source": [ + "## Import libraries" + ] }, { "cell_type": "code", "execution_count": 23, "metadata": { - "collapsed": true, "ExecuteTime": { "end_time": "2023-08-21T12:06:21.107499Z", "start_time": "2023-08-21T12:06:21.091114Z" - } + }, + "collapsed": true }, "outputs": [], "source": [ @@ -80,16 +80,23 @@ }, { "cell_type": "markdown", - "source": [ - "## Define constants" - ], "metadata": { "collapsed": false - } + }, + "source": [ + "## Define constants" + ] }, { "cell_type": "code", "execution_count": 24, + "metadata": { + "ExecuteTime": { + "end_time": "2023-08-21T12:06:21.529881Z", + "start_time": "2023-08-21T12:06:21.514699Z" + }, + "collapsed": false + }, "outputs": [], "source": [ "# Constants.\n", @@ -124,85 +131,85 @@ "\n", "# Paths.\n", "DATA_URL = \"https://raw.githubusercontent.com/Giskard-AI/giskard-examples/main/datasets/credit_scoring_classification_model_dataset/german_credit_prepared.csv\"" - ], - "metadata": { - "collapsed": false, - "ExecuteTime": { - "end_time": "2023-08-21T12:06:21.529881Z", - "start_time": "2023-08-21T12:06:21.514699Z" - } - } + ] }, { "cell_type": "markdown", - "source": [ - "## Dataset preparation" - ], "metadata": { "collapsed": false - } + }, + "source": [ + "## Dataset preparation" + ] }, { "cell_type": "markdown", - "source": [ - "### Load data" - ], "metadata": { "collapsed": false - } + }, + "source": [ + "### Load data" + ] }, { "cell_type": "code", "execution_count": 25, - "outputs": [], - "source": [ - "df = pd.read_csv(DATA_URL, keep_default_na=False, na_values=[\"_GSK_NA_\"])" - ], "metadata": { - "collapsed": false, "ExecuteTime": { "end_time": "2023-08-21T12:06:22.406745Z", "start_time": "2023-08-21T12:06:22.141476Z" - } - } + }, + "collapsed": false + }, + "outputs": [], + "source": [ + "df = pd.read_csv(DATA_URL, keep_default_na=False, na_values=[\"_GSK_NA_\"])" + ] }, { "cell_type": "markdown", - "source": [ - "### Train-test split" - ], "metadata": { "collapsed": false - } + }, + "source": [ + "### Train-test split" + ] }, { "cell_type": "code", "execution_count": 26, - "outputs": [], - "source": [ - "X_train, X_test, Y_train, Y_test = train_test_split(df.drop(columns=TARGET_COLUMN_NAME), df[TARGET_COLUMN_NAME],\n", - " test_size=0.2, random_state=0, stratify=df[TARGET_COLUMN_NAME])" - ], "metadata": { - "collapsed": false, "ExecuteTime": { "end_time": "2023-08-21T12:06:22.588559Z", "start_time": "2023-08-21T12:06:22.568863Z" - } - } + }, + "collapsed": false + }, + "outputs": [], + "source": [ + "X_train, X_test, Y_train, Y_test = train_test_split(df.drop(columns=TARGET_COLUMN_NAME), df[TARGET_COLUMN_NAME],\n", + " test_size=0.2, random_state=0, stratify=df[TARGET_COLUMN_NAME])" + ] }, { "cell_type": "markdown", - "source": [ - "### Wrap dataset with Giskard" - ], "metadata": { "collapsed": false - } + }, + "source": [ + "### Wrap dataset with Giskard" + ] }, { "cell_type": "code", "execution_count": 27, + "metadata": { + "ExecuteTime": { + "end_time": "2023-08-21T12:06:23.000841Z", + "start_time": "2023-08-21T12:06:22.975202Z" + }, + "collapsed": false + }, "outputs": [], "source": [ "raw_data = pd.concat([X_test, Y_test], axis=1)\n", @@ -212,36 +219,36 @@ " name='German credit scoring dataset', # Optional.\n", " cat_columns=COLUMNS_TO_ENCODE # List of categorical columns. Optional, but is a MUST if available. Inferred automatically if not.\n", ")" - ], - "metadata": { - "collapsed": false, - "ExecuteTime": { - "end_time": "2023-08-21T12:06:23.000841Z", - "start_time": "2023-08-21T12:06:22.975202Z" - } - } + ] }, { "cell_type": "markdown", - "source": [ - "## Model training" - ], "metadata": { "collapsed": false - } + }, + "source": [ + "## Model training" + ] }, { "cell_type": "markdown", - "source": [ - "### Define preprocessing steps" - ], "metadata": { "collapsed": false - } + }, + "source": [ + "### Define preprocessing steps" + ] }, { "cell_type": "code", "execution_count": 28, + "metadata": { + "ExecuteTime": { + "end_time": "2023-08-21T12:06:23.767134Z", + "start_time": "2023-08-21T12:06:23.718945Z" + }, + "collapsed": false + }, "outputs": [], "source": [ "numeric_transformer = Pipeline(steps=[\n", @@ -258,27 +265,27 @@ " (\"num\", numeric_transformer, COLUMNS_TO_SCALE),\n", " (\"cat\", categorical_transformer, COLUMNS_TO_ENCODE),\n", "])" - ], - "metadata": { - "collapsed": false, - "ExecuteTime": { - "end_time": "2023-08-21T12:06:23.767134Z", - "start_time": "2023-08-21T12:06:23.718945Z" - } - } + ] }, { "cell_type": "markdown", - "source": [ - "### Build estimator" - ], "metadata": { "collapsed": false - } + }, + "source": [ + "### Build estimator" + ] }, { "cell_type": "code", "execution_count": 29, + "metadata": { + "ExecuteTime": { + "end_time": "2023-08-21T12:06:24.530066Z", + "start_time": "2023-08-21T12:06:24.368164Z" + }, + "collapsed": false + }, "outputs": [ { "name": "stdout", @@ -315,27 +322,23 @@ "pred_test = pipeline.predict(X_test)\n", "\n", "print(classification_report(Y_test, pred_test))" - ], - "metadata": { - "collapsed": false, - "ExecuteTime": { - "end_time": "2023-08-21T12:06:24.530066Z", - "start_time": "2023-08-21T12:06:24.368164Z" - } - } + ] }, { "cell_type": "markdown", - "source": [ - "### Wrap model with Giskard" - ], "metadata": { "collapsed": false - } + }, + "source": [ + "### Wrap model with Giskard" + ] }, { "cell_type": "code", "execution_count": null, + "metadata": { + "collapsed": false + }, "outputs": [], "source": [ "wrapped_model = Model(\n", @@ -349,39 +352,2371 @@ "\n", "# Validate wrapped model.\n", "print(classification_report(Y_test, pipeline.classes_[wrapped_model.predict(wrapped_data).raw_prediction]))" - ], - "metadata": { - "collapsed": false - } + ] }, { "cell_type": "markdown", + "metadata": { + "collapsed": false + }, "source": [ "## Scan your model to find vulnerabilities\n", "With the Giskard scan feature, you can detect vulnerabilities in your model, including performance biases, unrobustness, data leakage, stochasticity, underconfidence, ethical issues, and more. For detailed information about the scan feature, please refer to our scan documentation." - ], - "metadata": { - "collapsed": false - } + ] }, { "cell_type": "code", "execution_count": null, + "metadata": { + "collapsed": false + }, "outputs": [], "source": [ "results = giskard.scan(wrapped_model, wrapped_data)" - ], - "metadata": { - "collapsed": false - } + ] }, { "cell_type": "code", "execution_count": 32, + "metadata": { + "ExecuteTime": { + "end_time": "2023-08-21T12:06:48.403734Z", + "start_time": "2023-08-21T12:06:47.466032Z" + }, + "collapsed": false + }, "outputs": [ { "data": { - "text/html": "\n" + "text/html": [ + "\n", + "" + ] }, "metadata": {}, "output_type": "display_data" @@ -389,29 +2724,25 @@ ], "source": [ "display(results)" - ], - "metadata": { - "collapsed": false, - "ExecuteTime": { - "end_time": "2023-08-21T12:06:48.403734Z", - "start_time": "2023-08-21T12:06:47.466032Z" - } - } + ] }, { "cell_type": "code", "execution_count": null, + "metadata": { + "collapsed": false + }, "outputs": [], "source": [ "test_suite = results.generate_test_suite(\"My first test suite\")\n", "test_suite.run()" - ], - "metadata": { - "collapsed": false - } + ] }, { "cell_type": "markdown", + "metadata": { + "collapsed": false + }, "source": [ "## Customize your suite by loading objects from the Giskard catalog\n", "\n", @@ -421,24 +2752,24 @@ "* Transformation functions such as generators of typos, paraphrase, style tune, etc\n", "\n", "For demo purposes, we will load a simple unit test (test_f1) that checks if the test F1 score is above the given threshold. For more examples of tests and functions, refer to the Giskard catalog." - ], - "metadata": { - "collapsed": false - } + ] }, { "cell_type": "code", "execution_count": null, + "metadata": { + "collapsed": false + }, "outputs": [], "source": [ "test_suite.add_test(testing.test_f1(model=wrapped_model, dataset=wrapped_data, threshold=0.7)).run()" - ], - "metadata": { - "collapsed": false - } + ] }, { "cell_type": "markdown", + "metadata": { + "collapsed": false + }, "source": [ "## Upload your suite to the Giskard server\n", "\n", @@ -447,36 +2778,36 @@ "* Debug your tests to diagnose the issues\n", "* Create more domain-specific tests that are integrating business feedback\n", "* Share your results" - ], - "metadata": { - "collapsed": false - } + ] }, { "cell_type": "code", "execution_count": null, + "metadata": { + "collapsed": false + }, "outputs": [], "source": [ "# Uploading the test suite will automatically save the model, dataset, tests, slicing & transformation functions inside the Giskard UI server\n", "# Create a Giskard client after having install the Giskard server (see documentation)\n", - "token = \"API_TOKEN\" # Find it in Settings in the Giskard server\n", + "key = \"API_KEY\" # Find it in Settings in the Giskard server\n", "\n", "client = GiskardClient(\n", " url=\"http://localhost:19000\", # URL of your Giskard instance\n", - " token=token\n", + " key=key\n", ")\n", "\n", "my_project = client.create_project(\"my_project\", \"PROJECT_NAME\", \"DESCRIPTION\")\n", "\n", "# Upload to the current project ✉️\n", "test_suite.upload(client, \"my_project\")" - ], - "metadata": { - "collapsed": false - } + ] }, { "cell_type": "markdown", + "metadata": { + "collapsed": false + }, "source": [ "
\n", "Connecting Google Colab with the Giskard server\n", @@ -491,19 +2822,16 @@ "\n", "Read the flowing [instructions](https://docs.giskard.ai/en/latest/cli/ngrok/index.html) in order to get the `ngrok_API_token`. This will provide you with the code snippets that you can copy and paste into your Colab notebook to establish a connection with your locally installed Giskard server\n", "
" - ], - "metadata": { - "collapsed": false - } + ] }, { "cell_type": "code", "execution_count": null, - "outputs": [], - "source": [], "metadata": { "collapsed": false - } + }, + "outputs": [], + "source": [] } ], "metadata": { @@ -527,4 +2855,4 @@ }, "nbformat": 4, "nbformat_minor": 0 -} \ No newline at end of file +} diff --git a/python-client/docs/reference/notebooks/drug_classification_sklearn.ipynb b/python-client/docs/reference/notebooks/drug_classification_sklearn.ipynb index b0dfbcc653..16811b245b 100644 --- a/python-client/docs/reference/notebooks/drug_classification_sklearn.ipynb +++ b/python-client/docs/reference/notebooks/drug_classification_sklearn.ipynb @@ -1307,11 +1307,11 @@ "source": [ "# Uploading the test suite will automatically save the model, dataset, tests, slicing & transformation functions inside the Giskard UI server\n", "# Create a Giskard client after having install the Giskard server (see documentation)\n", - "token = \"API_TOKEN\" # Find it in Settings in the Giskard server\n", + "key = \"API_KEY\" # Find it in Settings in the Giskard server\n", "\n", "client = GiskardClient(\n", " url=\"http://localhost:19000\", # URL of your Giskard instance\n", - " token=token\n", + " key=key\n", ")\n", "\n", "my_project = client.create_project(\"my_project\", \"PROJECT_NAME\", \"DESCRIPTION\")\n", @@ -1376,4 +1376,4 @@ }, "nbformat": 4, "nbformat_minor": 5 -} \ No newline at end of file +} diff --git a/python-client/docs/reference/notebooks/enron_email_classification_sklearn.ipynb b/python-client/docs/reference/notebooks/enron_email_classification_sklearn.ipynb index f4d3fedb9d..7639aca0b6 100644 --- a/python-client/docs/reference/notebooks/enron_email_classification_sklearn.ipynb +++ b/python-client/docs/reference/notebooks/enron_email_classification_sklearn.ipynb @@ -54,14 +54,14 @@ { "cell_type": "code", "execution_count": null, + "id": "133e4cf63ec27b24", "metadata": { "collapsed": false }, "outputs": [], "source": [ "pip install \"giskard>=2.0.0b\" -U" - ], - "id": "133e4cf63ec27b24" + ] }, { "attachments": {}, @@ -367,6 +367,7 @@ }, { "cell_type": "markdown", + "id": "d554fd54dda4d2d6", "metadata": { "collapsed": false }, @@ -374,8 +375,7 @@ "## Scan your model to find vulnerabilities\n", "\n", "With the Giskard scan feature, you can detect vulnerabilities in your model, including *performance biases*, *unrobustness*, *data leakage*, *stochasticity*, *underconfidence*, *ethical issues*, and *more*. For detailed information about the scan feature, please refer to our scan [documentation](https://docs.giskard.ai/en/latest/guides/scan/index.html)." - ], - "id": "d554fd54dda4d2d6" + ] }, { "cell_type": "code", @@ -400,7 +400,3698 @@ "outputs": [ { "data": { - "text/html": "\n" + "text/html": [ + "\n", + "" + ] }, "metadata": {}, "output_type": "display_data" @@ -511,10 +4202,10 @@ "source": [ "# Uploading the test suite will automatically save the model, dataset, tests, slicing & transformation functions inside the Giskard UI server\n", "# Create a Giskard client aftern having install the Giskard server (see documentation)\n", - "token = \"API_TOKEN\" # Find it in Settings in the Giskard server\n", + "key = \"API_KEY\" # Find it in Settings in the Giskard server\n", "client = giskard.GiskardClient(\n", " url=\"http://localhost:19000\", # URL of your Giskard instance\n", - " token=token\n", + " key=key\n", ")\n", "\n", "my_project = client.create_project(\"my_project\", \"PROJECT_NAME\", \"DESCRIPTION\")\n", @@ -525,6 +4216,10 @@ }, { "cell_type": "markdown", + "id": "7f594b5a762b09", + "metadata": { + "collapsed": false + }, "source": [ "
\n", "Connecting Google Colab with the Giskard server\n", @@ -539,11 +4234,7 @@ "\n", "Read the flowing [instructions](https://docs.giskard.ai/en/latest/cli/ngrok/index.html) in order to get the `ngrok_API_token`. This will provide you with the code snippets that you can copy and paste into your Colab notebook to establish a connection with your locally installed Giskard server\n", "
" - ], - "metadata": { - "collapsed": false - }, - "id": "7f594b5a762b09" + ] } ], "metadata": { @@ -567,4 +4258,4 @@ }, "nbformat": 4, "nbformat_minor": 5 -} \ No newline at end of file +} diff --git a/python-client/docs/reference/notebooks/example_notebook.ipynb b/python-client/docs/reference/notebooks/example_notebook.ipynb index 7d07511dc7..25648054ee 100644 --- a/python-client/docs/reference/notebooks/example_notebook.ipynb +++ b/python-client/docs/reference/notebooks/example_notebook.ipynb @@ -2716,10 +2716,10 @@ "\n", "#Uploading the test suite will automatically save the model, dataset, tests, slicing & transformation functions inside the Giskard UI server\n", "# Create a Giskard client aftern having install the Giskard server (see documentation)\n", - "token = \"API_TOKEN\" # Find it in Settings in the Giskard server\n", + "key = \"API_KEY\" # Find it in Settings in the Giskard server\n", "client = GiskardClient(\n", " url=\"http://localhost:19000\", # URL of your Giskard instance\n", - " token=token\n", + " key=key\n", ")\n", "\n", "my_project = client.create_project(\"my_project\", \"PROJECT_NAME\", \"DESCRIPTION\")\n", @@ -2730,6 +2730,9 @@ }, { "cell_type": "markdown", + "metadata": { + "collapsed": false + }, "source": [ "
\n", "Connecting Google Colab with the Giskard server\n", @@ -2744,10 +2747,7 @@ "\n", "Read the flowing [instructions](https://docs.giskard.ai/en/latest/cli/ngrok/index.html) in order to get the `ngrok_API_token`. This will provide you with the code snippets that you can copy and paste into your Colab notebook to establish a connection with your locally installed Giskard server\n", "
" - ], - "metadata": { - "collapsed": false - } + ] } ], "metadata": { @@ -2771,4 +2771,4 @@ }, "nbformat": 4, "nbformat_minor": 5 -} \ No newline at end of file +} diff --git a/python-client/docs/reference/notebooks/fake_real_news_classification.ipynb b/python-client/docs/reference/notebooks/fake_real_news_classification.ipynb index 3bca503014..df6e6d464a 100644 --- a/python-client/docs/reference/notebooks/fake_real_news_classification.ipynb +++ b/python-client/docs/reference/notebooks/fake_real_news_classification.ipynb @@ -2045,11 +2045,11 @@ "source": [ "# Uploading the test suite will automatically save the model, dataset, tests, slicing & transformation functions inside the Giskard UI server\n", "# Create a Giskard client after having install the Giskard server (see documentation)\n", - "token = \"API_TOKEN\" # Find it in Settings in the Giskard server\n", + "key = \"API_KEY\" # Find it in Settings in the Giskard server\n", "\n", "client = GiskardClient(\n", " url=\"http://localhost:19000\", # URL of your Giskard instance\n", - " token=token\n", + " key=key\n", ")\n", "\n", "my_project = client.create_project(\"my_project\", \"PROJECT_NAME\", \"DESCRIPTION\")\n", @@ -2101,4 +2101,4 @@ }, "nbformat": 4, "nbformat_minor": 4 -} \ No newline at end of file +} diff --git a/python-client/docs/reference/notebooks/hotel_text_regression.ipynb b/python-client/docs/reference/notebooks/hotel_text_regression.ipynb index ccd0b81ad9..8baf7f9f65 100644 --- a/python-client/docs/reference/notebooks/hotel_text_regression.ipynb +++ b/python-client/docs/reference/notebooks/hotel_text_regression.ipynb @@ -1887,11 +1887,11 @@ "source": [ "# Uploading the test suite will automatically save the model, dataset, tests, slicing & transformation functions inside the Giskard UI server\n", "# Create a Giskard client after having install the Giskard server (see documentation)\n", - "token = \"API_TOKEN\" # Find it in Settings in the Giskard server\n", + "key = \"API_KEY\" # Find it in Settings in the Giskard server\n", "\n", "client = GiskardClient(\n", " url=\"http://localhost:19000\", # URL of your Giskard instance\n", - " token=token\n", + " key=key\n", ")\n", "\n", "my_project = client.create_project(\"my_project\", \"PROJECT_NAME\", \"DESCRIPTION\")\n", @@ -1943,4 +1943,4 @@ }, "nbformat": 4, "nbformat_minor": 1 -} \ No newline at end of file +} diff --git a/python-client/docs/reference/notebooks/ieee_fraud_detection_adversarial_validation.ipynb b/python-client/docs/reference/notebooks/ieee_fraud_detection_adversarial_validation.ipynb index 2daa46cc2a..42e163f8b4 100644 --- a/python-client/docs/reference/notebooks/ieee_fraud_detection_adversarial_validation.ipynb +++ b/python-client/docs/reference/notebooks/ieee_fraud_detection_adversarial_validation.ipynb @@ -2740,11 +2740,11 @@ "source": [ "# Uploading the test suite will automatically save the model, dataset, tests, slicing & transformation functions inside the Giskard UI server\n", "# Create a Giskard client after having install the Giskard server (see documentation)\n", - "token = \"API_TOKEN\" # Find it in Settings in the Giskard server\n", + "key = \"API_KEY\" # Find it in Settings in the Giskard server\n", "\n", "client = GiskardClient(\n", " url=\"http://localhost:19000\", # URL of your Giskard instance\n", - " token=token\n", + " key=key\n", ")\n", "\n", "my_project = client.create_project(\"my_project\", \"PROJECT_NAME\", \"DESCRIPTION\")\n", @@ -2796,4 +2796,4 @@ }, "nbformat": 4, "nbformat_minor": 2 -} \ No newline at end of file +} diff --git a/python-client/docs/reference/notebooks/insurance_prediction_lgbm.ipynb b/python-client/docs/reference/notebooks/insurance_prediction_lgbm.ipynb index c4a6c76759..1f4d6b4bf5 100644 --- a/python-client/docs/reference/notebooks/insurance_prediction_lgbm.ipynb +++ b/python-client/docs/reference/notebooks/insurance_prediction_lgbm.ipynb @@ -1861,11 +1861,11 @@ "source": [ "# Uploading the test suite will automatically save the model, dataset, tests, slicing & transformation functions inside the Giskard UI server\n", "# Create a Giskard client after having install the Giskard server (see documentation)\n", - "token = \"API_TOKEN\" # Find it in Settings in the Giskard server\n", + "key = \"API_KEY\" # Find it in Settings in the Giskard server\n", "\n", "client = GiskardClient(\n", " url=\"http://localhost:19000\", # URL of your Giskard instance\n", - " token=token\n", + " key=key\n", ")\n", "\n", "my_project = client.create_project(\"my_project\", \"PROJECT_NAME\", \"DESCRIPTION\")\n", @@ -1943,4 +1943,4 @@ }, "nbformat": 4, "nbformat_minor": 5 -} \ No newline at end of file +} diff --git a/python-client/docs/reference/notebooks/m5_sales_prediction_lgbm.ipynb b/python-client/docs/reference/notebooks/m5_sales_prediction_lgbm.ipynb index f72074a6b8..a3d0fc1cab 100644 --- a/python-client/docs/reference/notebooks/m5_sales_prediction_lgbm.ipynb +++ b/python-client/docs/reference/notebooks/m5_sales_prediction_lgbm.ipynb @@ -1527,11 +1527,11 @@ "source": [ "# Uploading the test suite will automatically save the model, dataset, tests, slicing & transformation functions inside the Giskard UI server\n", "# Create a Giskard client after having install the Giskard server (see documentation)\n", - "token = \"API_TOKEN\" # Find it in Settings in the Giskard server\n", + "key = \"API_KEY\" # Find it in Settings in the Giskard server\n", "\n", "client = GiskardClient(\n", " url=\"http://localhost:19000\", # URL of your Giskard instance\n", - " token=token\n", + " key=key\n", ")\n", "\n", "my_project = client.create_project(\"my_project\", \"PROJECT_NAME\", \"DESCRIPTION\")\n", diff --git a/python-client/docs/reference/notebooks/medical_transcript_classification_sklearn.ipynb b/python-client/docs/reference/notebooks/medical_transcript_classification_sklearn.ipynb index f4c4862a40..0aa6d645b1 100644 --- a/python-client/docs/reference/notebooks/medical_transcript_classification_sklearn.ipynb +++ b/python-client/docs/reference/notebooks/medical_transcript_classification_sklearn.ipynb @@ -4282,11 +4282,11 @@ "source": [ "# Uploading the test suite will automatically save the model, dataset, tests, slicing & transformation functions inside the Giskard UI server\n", "# Create a Giskard client after having install the Giskard server (see documentation)\n", - "token = \"API_TOKEN\" # Find it in Settings in the Giskard server\n", + "key = \"API_KEY\" # Find it in Settings in the Giskard server\n", "\n", "client = GiskardClient(\n", " url=\"http://localhost:19000\", # URL of your Giskard instance\n", - " token=token\n", + " key=key\n", ")\n", "\n", "my_project = client.create_project(\"my_project\", \"PROJECT_NAME\", \"DESCRIPTION\")\n", @@ -4351,4 +4351,4 @@ }, "nbformat": 4, "nbformat_minor": 5 -} \ No newline at end of file +} diff --git a/python-client/docs/reference/notebooks/movie_review_sentiment_classification_pytorch_sklearn.ipynb b/python-client/docs/reference/notebooks/movie_review_sentiment_classification_pytorch_sklearn.ipynb index d544ec53d5..214de5d9af 100644 --- a/python-client/docs/reference/notebooks/movie_review_sentiment_classification_pytorch_sklearn.ipynb +++ b/python-client/docs/reference/notebooks/movie_review_sentiment_classification_pytorch_sklearn.ipynb @@ -1578,11 +1578,11 @@ "source": [ "# Uploading the test suite will automatically save the model, dataset, tests, slicing & transformation functions inside the Giskard UI server\n", "# Create a Giskard client after having install the Giskard server (see documentation)\n", - "token = \"API_TOKEN\" # Find it in Settings in the Giskard server\n", + "key = \"API_KEY\" # Find it in Settings in the Giskard server\n", "\n", "client = GiskardClient(\n", " url=\"http://localhost:19000\", # URL of your Giskard instance\n", - " token=token\n", + " key=key\n", ")\n", "\n", "my_project = client.create_project(\"my_project\", \"PROJECT_NAME\", \"DESCRIPTION\")\n", @@ -2401,4 +2401,4 @@ }, "nbformat": 4, "nbformat_minor": 1 -} \ No newline at end of file +} diff --git a/python-client/docs/reference/notebooks/newspaper_classification_pytorch.ipynb b/python-client/docs/reference/notebooks/newspaper_classification_pytorch.ipynb index e313d1f4d1..3ddebfec78 100644 --- a/python-client/docs/reference/notebooks/newspaper_classification_pytorch.ipynb +++ b/python-client/docs/reference/notebooks/newspaper_classification_pytorch.ipynb @@ -2,6 +2,9 @@ "cells": [ { "cell_type": "markdown", + "metadata": { + "collapsed": false + }, "source": [ "# Newspaper classification [PyTorch]\n", "\n", @@ -16,53 +19,50 @@ " * Compare models to decide which one to promote\n", " * Debug your tests to diagnose issues\n", " * Share your results and collect business feedback from your team" - ], - "metadata": { - "collapsed": false - } + ] }, { "cell_type": "markdown", - "source": [ - "## Install Giskard" - ], "metadata": { "collapsed": false - } + }, + "source": [ + "## Install Giskard" + ] }, { "cell_type": "code", "execution_count": 1, - "outputs": [], - "source": [ - "!pip install \"giskard>=2.0.0b\" -U" - ], "metadata": { - "collapsed": false, "ExecuteTime": { "end_time": "2023-08-22T12:41:11.380265Z", "start_time": "2023-08-22T12:41:11.302704Z" - } - } + }, + "collapsed": false + }, + "outputs": [], + "source": [ + "!pip install \"giskard>=2.0.0b\" -U" + ] }, { "cell_type": "markdown", - "source": [ - "## Import libraries" - ], "metadata": { "collapsed": false - } + }, + "source": [ + "## Import libraries" + ] }, { "cell_type": "code", "execution_count": 2, "metadata": { - "id": "eup4gpgVoA10", "ExecuteTime": { "end_time": "2023-08-22T12:41:36.268665Z", "start_time": "2023-08-22T12:41:13.622046Z" - } + }, + "id": "eup4gpgVoA10" }, "outputs": [], "source": [ @@ -85,16 +85,23 @@ }, { "cell_type": "markdown", - "source": [ - "## Define constants" - ], "metadata": { "collapsed": false - } + }, + "source": [ + "## Define constants" + ] }, { "cell_type": "code", "execution_count": 3, + "metadata": { + "ExecuteTime": { + "end_time": "2023-08-22T12:41:36.321238Z", + "start_time": "2023-08-22T12:41:36.276379Z" + }, + "collapsed": false + }, "outputs": [], "source": [ "DEVICE = torch.device(\"cpu\")\n", @@ -104,60 +111,60 @@ "FEATURE_COLUMN_NAME = \"text\"\n", "\n", "LOADERS_BATCH_SIZE = 64" - ], - "metadata": { - "collapsed": false, - "ExecuteTime": { - "end_time": "2023-08-22T12:41:36.321238Z", - "start_time": "2023-08-22T12:41:36.276379Z" - } - } + ] }, { "cell_type": "markdown", - "source": [ - "## Dataset preparation" - ], "metadata": { "collapsed": false - } + }, + "source": [ + "## Dataset preparation" + ] }, { "cell_type": "markdown", - "source": [ - "### Load data" - ], "metadata": { "collapsed": false - } + }, + "source": [ + "### Load data" + ] }, { "cell_type": "code", "execution_count": 4, - "outputs": [], - "source": [ - "train_data, test_data = AG_NEWS()" - ], "metadata": { - "collapsed": false, "ExecuteTime": { "end_time": "2023-08-22T12:41:37.446562Z", "start_time": "2023-08-22T12:41:36.300328Z" - } - } + }, + "collapsed": false + }, + "outputs": [], + "source": [ + "train_data, test_data = AG_NEWS()" + ] }, { "cell_type": "markdown", - "source": [ - "### Wrap dataset with Giskard" - ], "metadata": { "collapsed": false - } + }, + "source": [ + "### Wrap dataset with Giskard" + ] }, { "cell_type": "code", "execution_count": 20, + "metadata": { + "ExecuteTime": { + "end_time": "2023-08-22T13:59:47.323179Z", + "start_time": "2023-08-22T13:59:45.958410Z" + }, + "collapsed": false + }, "outputs": [], "source": [ "raw_data = pd.DataFrame({TARGET_COLUMN_NAME: TARGET_MAP[label_id - 1], FEATURE_COLUMN_NAME: text} \n", @@ -167,23 +174,16 @@ " name=\"Test Dataset\", # Ground truth variable\n", " target=\"label\", # Optional\n", ")" - ], - "metadata": { - "collapsed": false, - "ExecuteTime": { - "end_time": "2023-08-22T13:59:47.323179Z", - "start_time": "2023-08-22T13:59:45.958410Z" - } - } + ] }, { "cell_type": "markdown", - "source": [ - "### Prepare dataloaders for training and evaluation" - ], "metadata": { "collapsed": false - } + }, + "source": [ + "### Prepare dataloaders for training and evaluation" + ] }, { "cell_type": "code", @@ -253,16 +253,23 @@ }, { "cell_type": "markdown", - "source": [ - "### Define model" - ], "metadata": { "collapsed": false - } + }, + "source": [ + "### Define model" + ] }, { "cell_type": "code", "execution_count": 22, + "metadata": { + "ExecuteTime": { + "end_time": "2023-08-22T14:01:16.715230Z", + "start_time": "2023-08-22T14:01:16.579679Z" + }, + "collapsed": false + }, "outputs": [], "source": [ "class TextClassificationModel(nn.Module):\n", @@ -284,23 +291,16 @@ "\n", "\n", "model = TextClassificationModel(vocab_size=len(vocab), embed_dim=64, num_class=4).to(DEVICE)" - ], - "metadata": { - "collapsed": false, - "ExecuteTime": { - "end_time": "2023-08-22T14:01:16.715230Z", - "start_time": "2023-08-22T14:01:16.579679Z" - } - } + ] }, { "cell_type": "markdown", - "source": [ - "### Train and evaluate model" - ], "metadata": { "collapsed": false - } + }, + "source": [ + "### Train and evaluate model" + ] }, { "cell_type": "code", @@ -368,16 +368,19 @@ }, { "cell_type": "markdown", - "source": [ - "### Wrap model with Giskard" - ], "metadata": { "collapsed": false - } + }, + "source": [ + "### Wrap model with Giskard" + ] }, { "cell_type": "code", "execution_count": null, + "metadata": { + "collapsed": false + }, "outputs": [], "source": [ "def infer_predictions(_model: torch.nn.Module, _dataloader: DataLoader) -> np.ndarray:\n", @@ -418,39 +421,794 @@ "# Validate wrapped model.\n", "wrapped_test_metric = accuracy_score(wrapped_data.df[TARGET_COLUMN_NAME], wrapped_model.predict(wrapped_data).prediction)\n", "print(f\"Wrapped Test accuracy: {wrapped_test_metric:.3f}\")" - ], - "metadata": { - "collapsed": false - } + ] }, { "cell_type": "markdown", + "metadata": { + "collapsed": false + }, "source": [ "## Scan model with Giskard\n", "With the Giskard scan feature, you can detect vulnerabilities in your model, including performance biases, unrobustness, data leakage, stochasticity, underconfidence, ethical issues, and more. For detailed information about the scan feature, please refer to our scan documentation." - ], - "metadata": { - "collapsed": false - } + ] }, { "cell_type": "code", "execution_count": null, + "metadata": { + "collapsed": false + }, "outputs": [], "source": [ "results = scan(wrapped_model, wrapped_data)" - ], - "metadata": { - "collapsed": false - } + ] }, { "cell_type": "code", "execution_count": 12, + "metadata": { + "ExecuteTime": { + "end_time": "2023-08-22T13:19:45.209851Z", + "start_time": "2023-08-22T13:19:44.778914Z" + }, + "collapsed": false + }, "outputs": [ { "data": { - "text/html": "\n" + "text/html": [ + "\n", + "" + ] }, "metadata": {}, "output_type": "display_data" @@ -458,39 +1216,35 @@ ], "source": [ "display(results)" - ], - "metadata": { - "collapsed": false, - "ExecuteTime": { - "end_time": "2023-08-22T13:19:45.209851Z", - "start_time": "2023-08-22T13:19:44.778914Z" - } - } + ] }, { "cell_type": "markdown", + "metadata": { + "collapsed": false + }, "source": [ "## Generate a test suite from the Scan\n", "The objects produced by the scan can be used as fixtures to generate a test suite that integrate domain-specific issues. To create custom tests, refer to the Test your ML Model page." - ], - "metadata": { - "collapsed": false - } + ] }, { "cell_type": "code", "execution_count": null, + "metadata": { + "collapsed": false + }, "outputs": [], "source": [ "test_suite = results.generate_test_suite(\"My first test suite\")\n", "test_suite.run()" - ], - "metadata": { - "collapsed": false - } + ] }, { "cell_type": "markdown", + "metadata": { + "collapsed": false + }, "source": [ "## Customize your suite by loading objects from the Giskard catalog\n", "\n", @@ -501,24 +1255,24 @@ "* Transformation functions such as generators of typos, paraphrase, style tune, etc\n", "\n", "For demo purposes, we will load a simple unit test (test_f1) that checks if the test F1 score is above the given threshold. For more examples of tests and functions, refer to the Giskard catalog." - ], - "metadata": { - "collapsed": false - } + ] }, { "cell_type": "code", "execution_count": null, + "metadata": { + "collapsed": false + }, "outputs": [], "source": [ "test_suite.add_test(testing.test_f1(model=wrapped_model, dataset=wrapped_data, threshold=0.7)).run()" - ], - "metadata": { - "collapsed": false - } + ] }, { "cell_type": "markdown", + "metadata": { + "collapsed": false + }, "source": [ "## Upload your suite to the Giskard server\n", "\n", @@ -528,35 +1282,35 @@ "* Debug your tests to diagnose the issues\n", "* Create more domain-specific tests that are integrating business feedback\n", "* Share your results" - ], - "metadata": { - "collapsed": false - } + ] }, { "cell_type": "code", "execution_count": null, + "metadata": { + "collapsed": false + }, "outputs": [], "source": [ "# Uploading the test suite will automatically save the model, dataset, tests, slicing & transformation functions inside the Giskard UI server\n", "# Create a Giskard client after having install the Giskard server (see documentation)\n", - "token = \"API_TOKEN\" # Find it in Settings in the Giskard server\n", + "key = \"API_KEY\" # Find it in Settings in the Giskard server\n", "client = GiskardClient(\n", " url=\"http://localhost:19000\", # URL of your Giskard instance\n", - " token=token\n", + " key=key\n", ")\n", "\n", "my_project = client.create_project(\"my_project\", \"PROJECT_NAME\", \"DESCRIPTION\")\n", "\n", "# Upload to the current project ✉️\n", "test_suite.upload(client, \"my_project\")" - ], - "metadata": { - "collapsed": false - } + ] }, { "cell_type": "markdown", + "metadata": { + "collapsed": false + }, "source": [ "
\n", "Connecting Google Colab with the Giskard server\n", @@ -571,10 +1325,7 @@ "\n", "Read the flowing [instructions](https://docs.giskard.ai/en/latest/cli/ngrok/index.html) in order to get the `ngrok_API_token`. This will provide you with the code snippets that you can copy and paste into your Colab notebook to establish a connection with your locally installed Giskard server\n", "
" - ], - "metadata": { - "collapsed": false - } + ] } ], "metadata": { @@ -602,4 +1353,4 @@ }, "nbformat": 4, "nbformat_minor": 1 -} \ No newline at end of file +} diff --git a/python-client/docs/reference/notebooks/tripadvisor_sentiment_classification.ipynb b/python-client/docs/reference/notebooks/tripadvisor_sentiment_classification.ipynb index 91c965e1e5..bc17b4759d 100644 --- a/python-client/docs/reference/notebooks/tripadvisor_sentiment_classification.ipynb +++ b/python-client/docs/reference/notebooks/tripadvisor_sentiment_classification.ipynb @@ -3146,10 +3146,10 @@ "source": [ "# Uploading the test suite will automatically save the model, dataset, tests, slicing & transformation functions inside the Giskard UI server\n", "# Create a Giskard client aftern having install the Giskard server (see documentation)\n", - "token = \"API_TOKEN\" # Find it in Settings in the Giskard server\n", + "key = \"API_KEY\" # Find it in Settings in the Giskard server\n", "client = giskard.GiskardClient(\n", " url=\"http://localhost:19000\", # URL of your Giskard instance\n", - " token=token\n", + " key=key\n", ")\n", "\n", "my_project = client.create_project(\"my_project\", \"PROJECT_NAME\", \"DESCRIPTION\")\n", @@ -3202,4 +3202,4 @@ }, "nbformat": 4, "nbformat_minor": 5 -} \ No newline at end of file +} diff --git a/python-client/docs/reference/notebooks/wage_classification.ipynb b/python-client/docs/reference/notebooks/wage_classification.ipynb index 3708d3325c..151332bb12 100644 --- a/python-client/docs/reference/notebooks/wage_classification.ipynb +++ b/python-client/docs/reference/notebooks/wage_classification.ipynb @@ -2382,10 +2382,10 @@ "source": [ "# Uploading the test suite will automatically save the model, dataset, tests, slicing & transformation functions inside the Giskard UI server\n", "# Create a Giskard client after having install the Giskard server (see documentation)\n", - "token = \"API_TOKEN\" # Find it in Settings in the Giskard server\n", + "key = \"API_KEY\" # Find it in Settings in the Giskard server\n", "client = GiskardClient(\n", " url=\"http://localhost:19000\", # URL of your Giskard instance\n", - " token=token\n", + " key=key\n", ")\n", "\n", "my_project = client.create_project(\"my_project\", \"PROJECT_NAME\", \"DESCRIPTION\")\n", @@ -2437,4 +2437,4 @@ }, "nbformat": 4, "nbformat_minor": 0 -} \ No newline at end of file +} From df4fed6fd5d44f2c462fae1f40d5b3a30681d269 Mon Sep 17 00:00:00 2001 From: Kevin Messiaen Date: Tue, 3 Oct 2023 11:31:36 +0700 Subject: [PATCH 8/9] Fixed tests that check token --- python-client/tests/utils.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/python-client/tests/utils.py b/python-client/tests/utils.py index fb651afa58..cb1b9df834 100644 --- a/python-client/tests/utils.py +++ b/python-client/tests/utils.py @@ -2,18 +2,19 @@ import logging import os import re -import requests -import requests_mock import tarfile from pathlib import Path +import requests +import requests_mock + import tests.utils from giskard.client.giskard_client import GiskardClient logger = logging.getLogger(__name__) resource_dir: Path = Path.home() / ".giskard" -headers_to_match = {"Authorization": "Bearer API_ACCESS_KEY", "Content-Type": "application/json"} +headers_to_match = {"Authorization": "Bearer SECRET_TOKEN", "Content-Type": "application/json"} def match_model_id(my_model_id): From 5a2422183ad7f9d6d0a8319edc6d4f6f5c567ef0 Mon Sep 17 00:00:00 2001 From: Kevin Messiaen Date: Tue, 3 Oct 2023 11:35:43 +0700 Subject: [PATCH 9/9] Fixed tests that check token --- python-client/tests/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python-client/tests/utils.py b/python-client/tests/utils.py index cb1b9df834..398150eb27 100644 --- a/python-client/tests/utils.py +++ b/python-client/tests/utils.py @@ -68,7 +68,7 @@ def __enter__(self): ) url = "http://giskard-host:12345" - key = "API_ACCESS_KEY" + key = "SECRET_TOKEN" return GiskardClient(url, key), self.mocked_requests def __exit__(self, exc_type, exc_val, exc_tb):