diff --git a/P4/.ipynb_checkpoints/Jupyter_Notebook_Setup_P4-checkpoint.ipynb b/P4/.ipynb_checkpoints/Jupyter_Notebook_Setup_P4-checkpoint.ipynb index 7b3b6ff4..2eca3323 100644 --- a/P4/.ipynb_checkpoints/Jupyter_Notebook_Setup_P4-checkpoint.ipynb +++ b/P4/.ipynb_checkpoints/Jupyter_Notebook_Setup_P4-checkpoint.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 256, + "execution_count": 147, "id": "28c7a0ed", "metadata": {}, "outputs": [], @@ -21,12 +21,12 @@ "warnings.filterwarnings(action='ignore', category=UserWarning)\n", "warnings.filterwarnings(action='ignore', category=DataConversionWarning)\n", "# Loading the test dataset\n", - "test_data = pd.read_csv('/Users/zainab/student_data.csv')\n" + "test_data = pd.read_csv('student_data.csv')\n" ] }, { "cell_type": "code", - "execution_count": 221, + "execution_count": 148, "id": "e911c179", "metadata": { "scrolled": true @@ -152,7 +152,7 @@ "4 0 " ] }, - "execution_count": 221, + "execution_count": 148, "metadata": {}, "output_type": "execute_result" } @@ -163,8 +163,8 @@ }, { "cell_type": "code", - "execution_count": 222, - "id": "a5d76762", + "execution_count": 149, + "id": "94b15a3d", "metadata": {}, "outputs": [ { @@ -295,7 +295,7 @@ "max 4.000000 1.000000 " ] }, - "execution_count": 222, + "execution_count": 149, "metadata": {}, "output_type": "execute_result" } @@ -306,8 +306,8 @@ }, { "cell_type": "code", - "execution_count": 223, - "id": "9aa9b85b", + "execution_count": 150, + "id": "d1c29453", "metadata": {}, "outputs": [], "source": [ @@ -317,8 +317,8 @@ }, { "cell_type": "code", - "execution_count": 224, - "id": "4791c7a2", + "execution_count": 151, + "id": "79228d8e", "metadata": {}, "outputs": [], "source": [ @@ -328,8 +328,8 @@ }, { "cell_type": "code", - "execution_count": 225, - "id": "3a64f2c7", + "execution_count": 38, + "id": "412b261b", "metadata": {}, "outputs": [ { @@ -356,8 +356,28 @@ }, { "cell_type": "code", - "execution_count": 226, - "id": "127ba006", + "execution_count": 45, + "id": "4f022baf", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Index(['Age', 'Major', 'GPA', 'Extra Curricular', 'Num Programming Languages',\n", + " 'Num Past Internships', 'Good Candidate'],\n", + " dtype='object')\n" + ] + } + ], + "source": [ + "print(test_data.columns)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "ba1c9cb2", "metadata": {}, "outputs": [ { @@ -384,8 +404,51 @@ }, { "cell_type": "code", - "execution_count": 227, - "id": "1863ba26", + "execution_count": 152, + "id": "d27a24a9", + "metadata": {}, + "outputs": [ + { + "ename": "ValueError", + "evalue": "Could not interpret input 'Gender'", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", + "Input \u001b[0;32mIn [152]\u001b[0m, in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;66;03m#Plotting the distribution of the test dataset (Gender)\u001b[39;00m\n\u001b[1;32m 2\u001b[0m plt\u001b[38;5;241m.\u001b[39mtitle(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mGender\u001b[39m\u001b[38;5;124m'\u001b[39m)\n\u001b[0;32m----> 3\u001b[0m \u001b[43msns\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcountplot\u001b[49m\u001b[43m(\u001b[49m\u001b[43mx\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mGender\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdata\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtest_data\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 4\u001b[0m plt\u001b[38;5;241m.\u001b[39mylabel(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mDistribution\u001b[39m\u001b[38;5;124m'\u001b[39m)\n\u001b[1;32m 5\u001b[0m plt\u001b[38;5;241m.\u001b[39mxticks(rotation\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m90\u001b[39m) \n", + "File \u001b[0;32m~/miniconda3/lib/python3.9/site-packages/seaborn/_decorators.py:46\u001b[0m, in \u001b[0;36m_deprecate_positional_args..inner_f\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 36\u001b[0m warnings\u001b[38;5;241m.\u001b[39mwarn(\n\u001b[1;32m 37\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mPass the following variable\u001b[39m\u001b[38;5;132;01m{}\u001b[39;00m\u001b[38;5;124m as \u001b[39m\u001b[38;5;132;01m{}\u001b[39;00m\u001b[38;5;124mkeyword arg\u001b[39m\u001b[38;5;132;01m{}\u001b[39;00m\u001b[38;5;124m: \u001b[39m\u001b[38;5;132;01m{}\u001b[39;00m\u001b[38;5;124m. \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 38\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mFrom version 0.12, the only valid positional argument \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 43\u001b[0m \u001b[38;5;167;01mFutureWarning\u001b[39;00m\n\u001b[1;32m 44\u001b[0m )\n\u001b[1;32m 45\u001b[0m kwargs\u001b[38;5;241m.\u001b[39mupdate({k: arg \u001b[38;5;28;01mfor\u001b[39;00m k, arg \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mzip\u001b[39m(sig\u001b[38;5;241m.\u001b[39mparameters, args)})\n\u001b[0;32m---> 46\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mf\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/miniconda3/lib/python3.9/site-packages/seaborn/categorical.py:3598\u001b[0m, in \u001b[0;36mcountplot\u001b[0;34m(x, y, hue, data, order, hue_order, orient, color, palette, saturation, dodge, ax, **kwargs)\u001b[0m\n\u001b[1;32m 3595\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m x \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mand\u001b[39;00m y \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 3596\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mCannot pass values for both `x` and `y`\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m-> 3598\u001b[0m plotter \u001b[38;5;241m=\u001b[39m \u001b[43m_CountPlotter\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 3599\u001b[0m \u001b[43m \u001b[49m\u001b[43mx\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43my\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mhue\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdata\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43morder\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mhue_order\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 3600\u001b[0m \u001b[43m \u001b[49m\u001b[43mestimator\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mci\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mn_boot\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43munits\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mseed\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 3601\u001b[0m \u001b[43m \u001b[49m\u001b[43morient\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcolor\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mpalette\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43msaturation\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 3602\u001b[0m \u001b[43m \u001b[49m\u001b[43merrcolor\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43merrwidth\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcapsize\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdodge\u001b[49m\n\u001b[1;32m 3603\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 3605\u001b[0m plotter\u001b[38;5;241m.\u001b[39mvalue_label \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcount\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 3607\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m ax \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n", + "File \u001b[0;32m~/miniconda3/lib/python3.9/site-packages/seaborn/categorical.py:1584\u001b[0m, in \u001b[0;36m_BarPlotter.__init__\u001b[0;34m(self, x, y, hue, data, order, hue_order, estimator, ci, n_boot, units, seed, orient, color, palette, saturation, errcolor, errwidth, capsize, dodge)\u001b[0m\n\u001b[1;32m 1579\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m__init__\u001b[39m(\u001b[38;5;28mself\u001b[39m, x, y, hue, data, order, hue_order,\n\u001b[1;32m 1580\u001b[0m estimator, ci, n_boot, units, seed,\n\u001b[1;32m 1581\u001b[0m orient, color, palette, saturation, errcolor,\n\u001b[1;32m 1582\u001b[0m errwidth, capsize, dodge):\n\u001b[1;32m 1583\u001b[0m \u001b[38;5;124;03m\"\"\"Initialize the plotter.\"\"\"\u001b[39;00m\n\u001b[0;32m-> 1584\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mestablish_variables\u001b[49m\u001b[43m(\u001b[49m\u001b[43mx\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43my\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mhue\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdata\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43morient\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1585\u001b[0m \u001b[43m \u001b[49m\u001b[43morder\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mhue_order\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43munits\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1586\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mestablish_colors(color, palette, saturation)\n\u001b[1;32m 1587\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mestimate_statistic(estimator, ci, n_boot, seed)\n", + "File \u001b[0;32m~/miniconda3/lib/python3.9/site-packages/seaborn/categorical.py:153\u001b[0m, in \u001b[0;36m_CategoricalPlotter.establish_variables\u001b[0;34m(self, x, y, hue, data, orient, order, hue_order, units)\u001b[0m\n\u001b[1;32m 151\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(var, \u001b[38;5;28mstr\u001b[39m):\n\u001b[1;32m 152\u001b[0m err \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mCould not interpret input \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;132;01m{}\u001b[39;00m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;241m.\u001b[39mformat(var)\n\u001b[0;32m--> 153\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(err)\n\u001b[1;32m 155\u001b[0m \u001b[38;5;66;03m# Figure out the plotting orientation\u001b[39;00m\n\u001b[1;32m 156\u001b[0m orient \u001b[38;5;241m=\u001b[39m infer_orient(\n\u001b[1;32m 157\u001b[0m x, y, orient, require_numeric\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mrequire_numeric\n\u001b[1;32m 158\u001b[0m )\n", + "\u001b[0;31mValueError\u001b[0m: Could not interpret input 'Gender'" + ] + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXwAAAEICAYAAABcVE8dAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8qNh9FAAAACXBIWXMAAAsTAAALEwEAmpwYAAAPzElEQVR4nO3df6zddX3H8efL1hoBB0Qu6lqYnStgTcToFZ2bE2eUli1pzMgCGJnMrCGKMZvJIEuGm2bL2I/MGYtdRypzyewfSqSaKnFZlG2IcrshUBnmrk56LZMW/LX6Ay+898c57B4vt73f3nPuve39PB/Jjfec7+ec+76ftE++/d57jqkqJEkr3zOWewBJ0tIw+JLUCIMvSY0w+JLUCIMvSY0w+JLUCIMvHYckb0vyr8s9h7QQBl8rQpLLk3wpyZEkj/Q/f0eSLPds0onC4Oukl+Q9wN8AfwE8H3gecA3wS8CaZRztpyRZtdwzqG0GXye1JKcD7wPeUVUfr6rvV89/VNVbqurHSZ6V5C+TPJTkW0m2J3l2//EXJ5lK8p7+vwweTnL1wPM/N8nuJN9L8mXgRbO+/gVJPpfksSQPJvnNgWO3JPlwkj1JjgCvX5pdkeZm8HWy+0XgWcBtx1hzI3Ae8DLgF4C1wA0Dx58PnN6//+3AtiRn9o9tA34EvAD47f4HAElOBT4H/CNwNnAFcFOSlww895XAnwDPAbz2r2Vl8HWyOws4XFXTT92R5M4k30nywySvA34H+N2qeqyqvg/8KXD5wHP8BHhfVf2kqvYA/wuc378E8xvADVV1pKruB/5+4HG/Dvx3VX2kqqar6t+BTwCXDay5rar+raqerKofLcL3L3W2erkHkIb0KHBWktVPRb+qXgOQZIre9fxTgL0DP78NMHg9/dHB/2AAPwBOA8bo/R05MHDsGwOf/xzwqiTfGbhvNfAPA7cHHystK4Ovk90XgR8DW+idXc92GPgh8JKq+uZxPvchYBo4B/jP/n3nDhw/AHyhqt54jOfw7Wh1wvCSjk5qVfUd4I/pXTu/LMlpSZ6R5GXAqcCTwN8Bf53kbIAka5Nc0uG5nwBuBf4oySlJNgK/NbDk08B5Sd6a5Jn9j1cmefFIv0lpRAy+TnpV9efA7wG/DzwCfAv4W+A64M7+/04CdyX5HvBPwPkdn/5aepd3/ge4BfjIwNf9PvAmej8PONhfcyO9HyJLJ5z4f4AiSW3wDF+SGjFv8JPs7L8g5f6jHE+SDyaZTHJvkpePfkxJ0rC6nOHfAmw6xvHNwIb+x1bgw8OPJUkatXmDX1V3AI8dY8kW4KP9l7PfBZyR5AWjGlCSNBqj+D38tfz0i0um+vc9PHthkq30/hXAqaee+ooLLrhgBF9ektqxd+/ew1U1tpDHjiL4c7397Jy/+lNVO4AdAOPj4zUxMTGCLy9J7UjyjflXzW0Uv6UzRe+ViE9ZR+93kiVJJ5BRBH83cFX/t3VeDXy3qp52OUeStLzmvaST5GPAxfTeoGoKeC/wTICq2g7sAS6l90rGHwBXz/1MkqTlNG/wq+qKeY4X8M6RTSRJWhS+0laSGmHwJakRBl+SGmHwJakRBl+SGmHwJakRBl+SGmHwJakRBl+SGmHwJakRBl+SGmHwJakRBl+SGmHwJakRBl+SGmHwJakRBl+SGmHwJakRBl+SGmHwJakRBl+SGmHwJakRBl+SGmHwJakRBl+SGmHwJakRBl+SGmHwJakRBl+SGmHwJakRBl+SGmHwJakRBl+SGmHwJakRBl+SGtEp+Ek2JXkwyWSS6+c4fnqSTyX5SpJ9Sa4e/aiSpGHMG/wkq4BtwGZgI3BFko2zlr0T+GpVXQhcDPxVkjUjnlWSNIQuZ/gXAZNVtb+qHgd2AVtmrSngOUkCnAY8BkyPdFJJ0lC6BH8tcGDg9lT/vkEfAl4MHATuA95dVU/OfqIkW5NMJJk4dOjQAkeWJC1El+Bnjvtq1u1LgHuAnwVeBnwoyc887UFVO6pqvKrGx8bGjnNUSdIwugR/Cjhn4PY6emfyg64Gbq2eSeDrwAWjGVGSNApdgn83sCHJ+v4PYi8Hds9a8xDwBoAkzwPOB/aPclBJ0nBWz7egqqaTXAvcDqwCdlbVviTX9I9vB94P3JLkPnqXgK6rqsOLOLck6TjNG3yAqtoD7Jl13/aBzw8CbxrtaJKkUfKVtpLUCIMvSY0w+JLUCIMvSY0w+JLUCIMvSY0w+JLUCIMvSY0w+JLUCIMvSY0w+JLUCIMvSY0w+JLUCIMvSY0w+JLUCIMvSY0w+JLUCIMvSY0w+JLUCIMvSY0w+JLUCIMvSY0w+JLUCIMvSY0w+JLUCIMvSY0w+JLUCIMvSY0w+JLUCIMvSY0w+JLUCIMvSY0w+JLUCIMvSY3oFPwkm5I8mGQyyfVHWXNxknuS7EvyhdGOKUka1ur5FiRZBWwD3ghMAXcn2V1VXx1YcwZwE7Cpqh5KcvYizStJWqAuZ/gXAZNVtb+qHgd2AVtmrbkSuLWqHgKoqkdGO6YkaVhdgr8WODBwe6p/36DzgDOTfD7J3iRXzfVESbYmmUgycejQoYVNLElakC7Bzxz31azbq4FXAL8GXAL8YZLznvagqh1VNV5V42NjY8c9rCRp4ea9hk/vjP6cgdvrgINzrDlcVUeAI0nuAC4EvjaSKSVJQ+tyhn83sCHJ+iRrgMuB3bPW3Aa8NsnqJKcArwIeGO2okqRhzHuGX1XTSa4FbgdWATural+Sa/rHt1fVA0k+C9wLPAncXFX3L+bgkqTjk6rZl+OXxvj4eE1MTCzL15akk1WSvVU1vpDH+kpbSWqEwZekRhh8SWqEwZekRhh8SWqEwZekRhh8SWqEwZekRhh8SWqEwZekRhh8SWqEwZekRhh8SWqEwZekRhh8SWqEwZekRhh8SWqEwZekRhh8SWqEwZekRhh8SWqEwZekRhh8SWqEwZekRhh8SWqEwZekRhh8SWqEwZekRhh8SWqEwZekRhh8SWqEwZekRhh8SWqEwZekRhh8SWpEp+An2ZTkwSSTSa4/xrpXJnkiyWWjG1GSNArzBj/JKmAbsBnYCFyRZONR1t0I3D7qISVJw+tyhn8RMFlV+6vqcWAXsGWOde8CPgE8MsL5JEkj0iX4a4EDA7en+vf9vyRrgTcD24/1REm2JplIMnHo0KHjnVWSNIQuwc8c99Ws2x8ArquqJ471RFW1o6rGq2p8bGys44iSpFFY3WHNFHDOwO11wMFZa8aBXUkAzgIuTTJdVZ8cxZCSpOF1Cf7dwIYk64FvApcDVw4uqKr1T32e5Bbg08Zekk4s8wa/qqaTXEvvt29WATural+Sa/rHj3ndXpJ0Yuhyhk9V7QH2zLpvztBX1duGH0uSNGq+0laSGmHwJakRBl+SGmHwJakRBl+SGmHwJakRBl+SGmHwJakRBl+SGmHwJakRBl+SGmHwJakRBl+SGmHwJakRBl+SGmHwJakRBl+SGmHwJakRBl+SGmHwJakRBl+SGmHwJakRBl+SGmHwJakRBl+SGmHwJakRBl+SGmHwJakRBl+SGmHwJakRBl+SGmHwJakRBl+SGmHwJakRnYKfZFOSB5NMJrl+juNvSXJv/+POJBeOflRJ0jDmDX6SVcA2YDOwEbgiycZZy74OvK6qXgq8H9gx6kElScPpcoZ/ETBZVfur6nFgF7BlcEFV3VlV3+7fvAtYN9oxJUnD6hL8tcCBgdtT/fuO5u3AZ+Y6kGRrkokkE4cOHeo+pSRpaF2CnznuqzkXJq+nF/zr5jpeVTuqaryqxsfGxrpPKUka2uoOa6aAcwZurwMOzl6U5KXAzcDmqnp0NONJkkalyxn+3cCGJOuTrAEuB3YPLkhyLnAr8Naq+trox5QkDWveM/yqmk5yLXA7sArYWVX7klzTP74duAF4LnBTEoDpqhpfvLElSccrVXNejl904+PjNTExsSxfW5JOVkn2LvSE2lfaSlIjDL4kNcLgS1IjDL4kNcLgS1IjDL4kNcLgS1IjDL4kNcLgS1IjDL4kNcLgS1IjDL4kNcLgS1IjDL4kNcLgS1IjDL4kNcLgS1IjDL4kNcLgS1IjDL4kNcLgS1IjDL4kNcLgS1IjDL4kNcLgS1IjDL4kNcLgS1IjDL4kNcLgS1IjDL4kNcLgS1IjDL4kNcLgS1IjDL4kNcLgS1IjOgU/yaYkDyaZTHL9HMeT5IP94/cmefnoR5UkDWPe4CdZBWwDNgMbgSuSbJy1bDOwof+xFfjwiOeUJA2pyxn+RcBkVe2vqseBXcCWWWu2AB+tnruAM5K8YMSzSpKGsLrDmrXAgYHbU8CrOqxZCzw8uCjJVnr/AgD4cZL7j2valess4PByD3GCcC9muBcz3IsZ5y/0gV2CnznuqwWsoap2ADsAkkxU1XiHr7/iuRcz3IsZ7sUM92JGkomFPrbLJZ0p4JyB2+uAgwtYI0laRl2CfzewIcn6JGuAy4Hds9bsBq7q/7bOq4HvVtXDs59IkrR85r2kU1XTSa4FbgdWATural+Sa/rHtwN7gEuBSeAHwNUdvvaOBU+98rgXM9yLGe7FDPdixoL3IlVPu9QuSVqBfKWtJDXC4EtSIxY9+L4tw4wOe/GW/h7cm+TOJBcux5xLYb69GFj3yiRPJLlsKedbSl32IsnFSe5Jsi/JF5Z6xqXS4e/I6Uk+leQr/b3o8vPCk06SnUkeOdprlRbczapatA96P+T9L+DngTXAV4CNs9ZcCnyG3u/yvxr40mLOtFwfHffiNcCZ/c83t7wXA+v+md4vBVy23HMv45+LM4CvAuf2b5+93HMv4178AXBj//Mx4DFgzXLPvgh78SvAy4H7j3J8Qd1c7DN835Zhxrx7UVV3VtW3+zfvovd6hpWoy58LgHcBnwAeWcrhlliXvbgSuLWqHgKoqpW6H132ooDnJAlwGr3gTy/tmIuvqu6g970dzYK6udjBP9pbLhzvmpXgeL/Pt9P7L/hKNO9eJFkLvBnYvoRzLYcufy7OA85M8vkke5NctWTTLa0ue/Eh4MX0Xth5H/DuqnpyacY7oSyom13eWmEYI3tbhhWg8/eZ5PX0gv/LizrR8umyFx8ArquqJ3oncytWl71YDbwCeAPwbOCLSe6qqq8t9nBLrMteXALcA/wq8CLgc0n+paq+t8iznWgW1M3FDr5vyzCj0/eZ5KXAzcDmqnp0iWZbal32YhzY1Y/9WcClSaar6pNLMuHS6fp35HBVHQGOJLkDuBBYacHvshdXA39WvQvZk0m+DlwAfHlpRjxhLKibi31Jx7dlmDHvXiQ5F7gVeOsKPHsbNO9eVNX6qnphVb0Q+DjwjhUYe+j2d+Q24LVJVic5hd671T6wxHMuhS578RC9f+mQ5Hn03jly/5JOeWJYUDcX9Qy/Fu9tGU46HffiBuC5wE39M9vpWoHvENhxL5rQZS+q6oEknwXuBZ4Ebq6qFffW4h3/XLwfuCXJffQua1xXVSvubZOTfAy4GDgryRTwXuCZMFw3fWsFSWqEr7SVpEYYfElqhMGXpEYYfElqhMGXpEYYfElqhMGXpEb8H5+5PU0LngkXAAAAAElFTkSuQmCC\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "#Plotting the distribution of the test dataset (Gender)\n", + "plt.title('Gender')\n", + "sns.countplot(x='Gender', data=test_data)\n", + "plt.ylabel('Distribution')\n", + "plt.xticks(rotation=90) \n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "57624579", "metadata": {}, "outputs": [ { @@ -412,8 +475,8 @@ }, { "cell_type": "code", - "execution_count": 228, - "id": "214dd318", + "execution_count": 15, + "id": "ccf66e8b", "metadata": {}, "outputs": [ { @@ -442,8 +505,8 @@ }, { "cell_type": "code", - "execution_count": 229, - "id": "d3a74385", + "execution_count": 16, + "id": "83b82ec6", "metadata": {}, "outputs": [ { @@ -471,8 +534,8 @@ }, { "cell_type": "code", - "execution_count": 230, - "id": "92d36835", + "execution_count": 17, + "id": "4c65532f", "metadata": {}, "outputs": [ { @@ -500,13 +563,13 @@ }, { "cell_type": "code", - "execution_count": 231, - "id": "8ff8d8a5", + "execution_count": 141, + "id": "85a840d1", "metadata": {}, "outputs": [ { "data": { - "image/png": "\n", + "image/png": "\n", "text/plain": [ "
" ] @@ -520,17 +583,16 @@ "source": [ "#Plotting the distribution of the test dataset (Number of Programming Languages)\n", "plt.title('Number of Programming Languages')\n", - "sns.countplot(x='Num Programming Languages', data=test_data )\n", + "sns.countplot(x='Num Programming Languages', data=test_data)\n", "plt.ylabel('Distribution')\n", "plt.xlabel('Number of Programming Languages')\n", - "plt.tight_layout()\n", - "plt.show()" + "plt.show()\n" ] }, { "cell_type": "code", - "execution_count": 257, - "id": "4b81b623", + "execution_count": 19, + "id": "0e8bc628", "metadata": {}, "outputs": [], "source": [ @@ -542,8 +604,8 @@ }, { "cell_type": "code", - "execution_count": 279, - "id": "e22cb1c5", + "execution_count": 51, + "id": "a7037861", "metadata": {}, "outputs": [ { @@ -553,29 +615,23 @@ "The accuracy score for this model is: 0.832\n", "Confusion matrix\n", "[[221 42]\n", - " [ 42 195]]\n" + " [ 42 195]]\n", + "Confusion matrix in percentages\n", + "[[33.33333333 16.66666667]\n", + " [16.66666667 33.33333333]]\n" ] }, { "data": { - "image/png": "\n", + "image/png": "\n", "text/plain": [ - "
" + "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Confusion matrix in percentages\n", - "[[33.33333333 16.66666667]\n", - " [16.66666667 33.33333333]]\n" - ] } ], "source": [ @@ -588,35 +644,18 @@ "accuracy = accuracy_score(Y_true, prediction)\n", "print(\"The accuracy score for this model is\" + \":\", accuracy)\n", "\n", - "conf_matrix= confusion_matrix(Y_true, prediction)\n", - "print('Confusion matrix')\n", - "print(conf_matrix)\n", "\n", - "classes = [\"1\", \"2\"] # Modify this based on your actual class labels\n", "\n", - "# Calculate the confusion matrix\n", - "confusion_matrix = confusion_matrix(y_test, prediction) # Assuming 'y_test' and 'prediction' are the actual and predicted labels\n", "\n", - "# Plot the confusion matrix\n", - "plt.figure(figsize=(8, 6))\n", - "plt.imshow(confusion_matrix, interpolation='nearest', cmap=plt.get_cmap('Blues'))\n", - "plt.title('Confusion Matrix')\n", - "plt.colorbar()\n", "\n", - "# Add labels and ticks\n", - "tick_marks = np.arange(len(classes))\n", - "plt.xticks(tick_marks, classes, rotation=90)\n", - "plt.yticks(tick_marks, classes)\n", + "conf_matrix= confusion_matrix(Y_true, prediction)\n", + "print('Confusion matrix')\n", + "print(conf_matrix)\n", "\n", - "# Display the values on the plot\n", - "thresh = confusion_matrix.max() / 2.\n", - "for i, j in itertools.product(range(confusion_matrix.shape[0]), range(confusion_matrix.shape[1])):\n", - " plt.text(j, i, format(confusion_matrix[i, j], 'd'), horizontalalignment=\"center\", color=\"white\" if confusion_matrix[i, j] > thresh else \"black\")\n", + "classes = [\"1\", \"2\"] \n", "\n", - "plt.tight_layout()\n", - "plt.ylabel('True label')\n", - "plt.xlabel('Predicted label')\n", - "plt.show()\n", + "# Computing the confusion matrix (Regular numbers)\n", + "confusion_matrix = confusion_matrix(Y_true, prediction) \n", "\n", "\n", "# Compute the confusion matrix percentage\n", @@ -627,36 +666,858 @@ "Y_true = np.array([0, 1, 1, 0, 1, 0])\n", "predictions = np.array([0, 1, 0, 0, 1, 1])\n", "conf_matrix = confusion_matrix(Y_true, predictions)\n", - "conf_matrix_percent = (conf_matrix / conf_matrix.sum()) * 100\n", + "conf_matrix_percentage = (conf_matrix / conf_matrix.sum()) * 100\n", "\n", "\n", "print(\"Confusion matrix in percentages\")\n", - "print(conf_matrix_percent)\n", + "print(conf_matrix_percentage)\n", + "\n", + "plt.figure(figsize=(6, 4))\n", + "sns.heatmap(conf_matrix_percentage, annot=True, fmt=\"0.2f\", cmap=\"Blues\", cbar=False, square=True)\n", + "plt.xlabel(\"Predicted\")\n", + "plt.ylabel(\"True\")\n", + "plt.title(\"Confusion Matrix in Percentages\")\n", + "plt.show()\n", + "\n", + "\n", " " ] }, { "cell_type": "code", - "execution_count": 234, - "id": "9afef01c", + "execution_count": 56, + "id": "8f9869f6", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The accuracy score for this model is: 0.832\n", + "Confusion matrix\n", + "[[221 42]\n", + " [ 42 195]]\n" + ] + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "#Performance Evaluation Section\n", + "\n", + "from sklearn.metrics import accuracy_score, confusion_matrix\n", + "Y_true= test_data['Good Candidate']\n", + "accuracy = accuracy_score(Y_true, prediction)\n", + "print(\"The accuracy score for this model is\" + \":\", accuracy)\n", + "\n", + "conf_matrix= confusion_matrix(Y_true, prediction)\n", + "print('Confusion matrix')\n", + "print(conf_matrix)\n", + "sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues')\n", + "plt.xlabel('Predicted')\n", + "plt.ylabel('True')\n", + "plt.title('Confusion Matrix')\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 153, + "id": "091bbc00", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "done with Performance Evaluation Section\n" + ] + } + ], + "source": [ + "print (\"done with Performance Evaluation Section\")" + ] + }, + { + "cell_type": "code", + "execution_count": 154, + "id": "59a603ff", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Starting fairness evaluation seciton\n" + ] + } + ], + "source": [ + "print (\"Starting fairness evaluation seciton\")" + ] + }, + { + "cell_type": "code", + "execution_count": 58, + "id": "684fd7a0", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " precision recall f1-score support\n", + "\n", + " 0 0.84 0.84 0.84 263\n", + " 1 0.82 0.82 0.82 237\n", + "\n", + " accuracy 0.83 500\n", + " macro avg 0.83 0.83 0.83 500\n", + "weighted avg 0.83 0.83 0.83 500\n", + "\n" + ] + } + ], + "source": [ + "# Classification Report\n", + "from sklearn.metrics import classification_report\n", + "report = classification_report(Y_true, prediction)\n", + "print(report)\n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": 92, + "id": "283e22ea", + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "from sklearn.metrics import roc_curve, auc, precision_recall_curve\n", + "#Receiver Operating Characteristic curve plotting \n", + "fpr, tpr, _ = roc_curve(Y_true, prediction)\n", + "roc_auc = auc(fpr, tpr)\n", + "\n", + "plt.figure()\n", + "plt.plot(fpr, tpr, color='darkblue', lw=2, label=f'Receiver Operating Characteristic curve (area = {roc_auc:.2f})')\n", + "plt.plot([0, 1], [0, 1], color='green', lw=2, linestyle='--')\n", + "plt.xlabel('False Positive')\n", + "plt.ylabel('True Positive')\n", + "plt.title('Receiver Operating Characteristic')\n", + "plt.legend(loc=\"lower right\")\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 90, + "id": "6d44244d", + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "# Precision-Recall Curve\n", + "precision, recall, _ = precision_recall_curve(Y_true, prediction)\n", + "\n", + "plt.figure()\n", + "plt.plot(recall, precision, color='darkblue', lw=2, label='Precision Recall curve')\n", + "plt.xlabel('Recall')\n", + "plt.ylabel('Precision')\n", + "plt.title('Precision-Recall Curve')\n", + "plt.legend(loc=\"lower left\")\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8270b981", + "metadata": {}, + "outputs": [], + "source": [ + "#Fairness Evaluation Section\n" + ] + }, + { + "cell_type": "code", + "execution_count": 76, + "id": "cf13e617", + "metadata": {}, + "outputs": [ + { + "ename": "KeyError", + "evalue": "'Gender'", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)", + "File \u001b[0;32m~/miniconda3/lib/python3.9/site-packages/pandas/core/indexes/base.py:3621\u001b[0m, in \u001b[0;36mIndex.get_loc\u001b[0;34m(self, key, method, tolerance)\u001b[0m\n\u001b[1;32m 3620\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m-> 3621\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_engine\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget_loc\u001b[49m\u001b[43m(\u001b[49m\u001b[43mcasted_key\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 3622\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m err:\n", + "File \u001b[0;32m~/miniconda3/lib/python3.9/site-packages/pandas/_libs/index.pyx:136\u001b[0m, in \u001b[0;36mpandas._libs.index.IndexEngine.get_loc\u001b[0;34m()\u001b[0m\n", + "File \u001b[0;32m~/miniconda3/lib/python3.9/site-packages/pandas/_libs/index.pyx:163\u001b[0m, in \u001b[0;36mpandas._libs.index.IndexEngine.get_loc\u001b[0;34m()\u001b[0m\n", + "File \u001b[0;32mpandas/_libs/hashtable_class_helper.pxi:5198\u001b[0m, in \u001b[0;36mpandas._libs.hashtable.PyObjectHashTable.get_item\u001b[0;34m()\u001b[0m\n", + "File \u001b[0;32mpandas/_libs/hashtable_class_helper.pxi:5206\u001b[0m, in \u001b[0;36mpandas._libs.hashtable.PyObjectHashTable.get_item\u001b[0;34m()\u001b[0m\n", + "\u001b[0;31mKeyError\u001b[0m: 'Gender'", + "\nThe above exception was the direct cause of the following exception:\n", + "\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)", + "Input \u001b[0;32mIn [76]\u001b[0m, in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 7\u001b[0m evaluation_using_gender[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mPrediction\u001b[39m\u001b[38;5;124m'\u001b[39m] \u001b[38;5;241m=\u001b[39m prediction\n\u001b[1;32m 9\u001b[0m \u001b[38;5;66;03m# Ensure 'Gender' column is present in the DataFrame\u001b[39;00m\n\u001b[0;32m---> 10\u001b[0m evaluation_using_gender[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mGender\u001b[39m\u001b[38;5;124m'\u001b[39m] \u001b[38;5;241m=\u001b[39m \u001b[43mtest_data\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mGender\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m]\u001b[49m\n\u001b[1;32m 12\u001b[0m \u001b[38;5;66;03m# Calculate true positive by 'Gender'\u001b[39;00m\n\u001b[1;32m 13\u001b[0m tpr_gender \u001b[38;5;241m=\u001b[39m evaluation_using_gender[evaluation_using_gender[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mGood Candidate\u001b[39m\u001b[38;5;124m'\u001b[39m] \u001b[38;5;241m==\u001b[39m \u001b[38;5;241m1\u001b[39m]\u001b[38;5;241m.\u001b[39mgroupby(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mGender\u001b[39m\u001b[38;5;124m'\u001b[39m)[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mPrediction\u001b[39m\u001b[38;5;124m'\u001b[39m]\u001b[38;5;241m.\u001b[39mmean()\n", + "File \u001b[0;32m~/miniconda3/lib/python3.9/site-packages/pandas/core/frame.py:3505\u001b[0m, in \u001b[0;36mDataFrame.__getitem__\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m 3503\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcolumns\u001b[38;5;241m.\u001b[39mnlevels \u001b[38;5;241m>\u001b[39m \u001b[38;5;241m1\u001b[39m:\n\u001b[1;32m 3504\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_getitem_multilevel(key)\n\u001b[0;32m-> 3505\u001b[0m indexer \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcolumns\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget_loc\u001b[49m\u001b[43m(\u001b[49m\u001b[43mkey\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 3506\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m is_integer(indexer):\n\u001b[1;32m 3507\u001b[0m indexer \u001b[38;5;241m=\u001b[39m [indexer]\n", + "File \u001b[0;32m~/miniconda3/lib/python3.9/site-packages/pandas/core/indexes/base.py:3623\u001b[0m, in \u001b[0;36mIndex.get_loc\u001b[0;34m(self, key, method, tolerance)\u001b[0m\n\u001b[1;32m 3621\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_engine\u001b[38;5;241m.\u001b[39mget_loc(casted_key)\n\u001b[1;32m 3622\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m err:\n\u001b[0;32m-> 3623\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m(key) \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01merr\u001b[39;00m\n\u001b[1;32m 3624\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mTypeError\u001b[39;00m:\n\u001b[1;32m 3625\u001b[0m \u001b[38;5;66;03m# If we have a listlike key, _check_indexing_error will raise\u001b[39;00m\n\u001b[1;32m 3626\u001b[0m \u001b[38;5;66;03m# InvalidIndexError. Otherwise we fall through and re-raise\u001b[39;00m\n\u001b[1;32m 3627\u001b[0m \u001b[38;5;66;03m# the TypeError.\u001b[39;00m\n\u001b[1;32m 3628\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_check_indexing_error(key)\n", + "\u001b[0;31mKeyError\u001b[0m: 'Gender'" + ] + } + ], + "source": [ + "# True positives for gender\n", + "x_test = test_data.drop(columns=['Good Candidate'], axis=1)\n", + "\n", + "prediction = model.predict(x_test)\n", + "\n", + "evaluation_using_gender = test_data.copy()\n", + "evaluation_using_gender['Prediction'] = prediction\n", + "\n", + "# Ensure 'Gender' column is present in the DataFrame\n", + "evaluation_using_gender['Gender'] = test_data['Gender']\n", + "\n", + "# Calculate true positive by 'Gender'\n", + "tpr_gender = evaluation_using_gender[evaluation_using_gender['Good Candidate'] == 1].groupby('Gender')['Prediction'].mean()\n", + "\n", + "print(\"TP classification by Gender:\")\n", + "print(tpr_gender)\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 79, + "id": "c244b83c", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "TP classification by Major:\n", + "Major\n", + "Business 0.875000\n", + "Computer Science 0.837838\n", + "Electrical and Computer Engineering 0.775000\n", + "Information Systems 0.756098\n", + "Math 0.844444\n", + "Statistics and Machine Learning 0.852941\n", + "Name: Prediction, dtype: float64\n" + ] + } + ], + "source": [ + "# True positives for majors\n", + "x_test = test_data.drop(columns=['Good Candidate'], axis=1)\n", + "prediction = model.predict(x_test)\n", + "\n", + "# Add the 'Prediction' column to evaluation_using_major\n", + "evaluation_using_major['Prediction'] = prediction\n", + "\n", + "# Filterring rows where 'Good Candidate' is 1 \n", + "tpr_major = evaluation_using_major[evaluation_using_major['Good Candidate'] == 1].groupby('Major')['Prediction'].mean()\n", + "\n", + "print(\"TP classification by Major:\")\n", + "print(tpr_major)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 80, + "id": "63372c1b", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "TP classification by Age\n", + "Age\n", + "18 0.818182\n", + "19 0.869565\n", + "20 0.720000\n", + "21 0.892857\n", + "22 0.891304\n", + "23 0.875000\n", + "24 0.666667\n", + "25 0.250000\n", + "Name: Prediction, dtype: float64\n" + ] + } + ], + "source": [ + "# True positives for AGE\n", + "\n", + "evaluation_using_age = test_data.copy()\n", + "\n", + "x_test = test_data.drop(columns=['Good Candidate'], axis=1)\n", + "prediction = model.predict(x_test) \n", + "\n", + "# Add the 'Prediction' column to evaluation_using_age\n", + "evaluation_using_age['Prediction'] = prediction\n", + "\n", + "# Filter rows where 'Good Candidate' is 1 \n", + "tpr_age = evaluation_using_age[evaluation_using_age['Good Candidate'] == 1].groupby('Age')['Prediction'].mean()\n", + "\n", + "print(\"TP classification by Age\")\n", + "print(tpr_age)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 75, + "id": "ce84922a", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Precision by Major:\n", + "Major\n", + "Business 0.897436\n", + "Computer Science 0.775000\n", + "Electrical and Computer Engineering 0.861111\n", + "Information Systems 0.794872\n", + "Math 0.844444\n", + "Statistics and Machine Learning 0.763158\n", + "dtype: float64\n" + ] + } + ], + "source": [ + "from sklearn.metrics import precision_score\n", + "\n", + "x_test = test_data.drop(columns=['Good Candidate'], axis=1)\n", + "prediction = model.predict(x_test) \n", + "\n", + "# Add the 'Prediction' column to evaluation_using_major\n", + "evaluation_using_major['Prediction'] = prediction\n", + "\n", + "# Calculate precision by 'Major'\n", + "precision_major = evaluation_using_major.groupby('Major').apply(\n", + " lambda x: precision_score(x['Good Candidate'], x['Prediction'], zero_division=0)\n", + ")\n", + "\n", + "print(\"Precision by Major:\")\n", + "print(precision_major)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 81, + "id": "a2cb18bb", + "metadata": {}, + "outputs": [ + { + "ename": "KeyError", + "evalue": "'Gender'", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)", + "Input \u001b[0;32mIn [81]\u001b[0m, in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 7\u001b[0m evaluation_using_major[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mPrediction\u001b[39m\u001b[38;5;124m'\u001b[39m] \u001b[38;5;241m=\u001b[39m prediction\n\u001b[1;32m 9\u001b[0m \u001b[38;5;66;03m# Calculate precision by 'Major'\u001b[39;00m\n\u001b[0;32m---> 10\u001b[0m precision_major \u001b[38;5;241m=\u001b[39m \u001b[43mevaluation_using_major\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mgroupby\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mGender\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m)\u001b[49m\u001b[38;5;241m.\u001b[39mapply(\n\u001b[1;32m 11\u001b[0m \u001b[38;5;28;01mlambda\u001b[39;00m x: precision_score(x[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mGood Candidate\u001b[39m\u001b[38;5;124m'\u001b[39m], x[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mPrediction\u001b[39m\u001b[38;5;124m'\u001b[39m], zero_division\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m0\u001b[39m)\n\u001b[1;32m 12\u001b[0m )\n\u001b[1;32m 14\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mPrecision by Gender:\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 15\u001b[0m \u001b[38;5;28mprint\u001b[39m(precision_gender)\n", + "File \u001b[0;32m~/miniconda3/lib/python3.9/site-packages/pandas/core/frame.py:7718\u001b[0m, in \u001b[0;36mDataFrame.groupby\u001b[0;34m(self, by, axis, level, as_index, sort, group_keys, squeeze, observed, dropna)\u001b[0m\n\u001b[1;32m 7713\u001b[0m axis \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_get_axis_number(axis)\n\u001b[1;32m 7715\u001b[0m \u001b[38;5;66;03m# https://github.com/python/mypy/issues/7642\u001b[39;00m\n\u001b[1;32m 7716\u001b[0m \u001b[38;5;66;03m# error: Argument \"squeeze\" to \"DataFrameGroupBy\" has incompatible type\u001b[39;00m\n\u001b[1;32m 7717\u001b[0m \u001b[38;5;66;03m# \"Union[bool, NoDefault]\"; expected \"bool\"\u001b[39;00m\n\u001b[0;32m-> 7718\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mDataFrameGroupBy\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 7719\u001b[0m \u001b[43m \u001b[49m\u001b[43mobj\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 7720\u001b[0m \u001b[43m \u001b[49m\u001b[43mkeys\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mby\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 7721\u001b[0m \u001b[43m \u001b[49m\u001b[43maxis\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43maxis\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 7722\u001b[0m \u001b[43m \u001b[49m\u001b[43mlevel\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mlevel\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 7723\u001b[0m \u001b[43m \u001b[49m\u001b[43mas_index\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mas_index\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 7724\u001b[0m \u001b[43m \u001b[49m\u001b[43msort\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43msort\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 7725\u001b[0m \u001b[43m \u001b[49m\u001b[43mgroup_keys\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mgroup_keys\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 7726\u001b[0m \u001b[43m \u001b[49m\u001b[43msqueeze\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43msqueeze\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;66;43;03m# type: ignore[arg-type]\u001b[39;49;00m\n\u001b[1;32m 7727\u001b[0m \u001b[43m \u001b[49m\u001b[43mobserved\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mobserved\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 7728\u001b[0m \u001b[43m \u001b[49m\u001b[43mdropna\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdropna\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 7729\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/miniconda3/lib/python3.9/site-packages/pandas/core/groupby/groupby.py:882\u001b[0m, in \u001b[0;36mGroupBy.__init__\u001b[0;34m(self, obj, keys, axis, level, grouper, exclusions, selection, as_index, sort, group_keys, squeeze, observed, mutated, dropna)\u001b[0m\n\u001b[1;32m 879\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m grouper \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 880\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mpandas\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mcore\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mgroupby\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mgrouper\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m get_grouper\n\u001b[0;32m--> 882\u001b[0m grouper, exclusions, obj \u001b[38;5;241m=\u001b[39m \u001b[43mget_grouper\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 883\u001b[0m \u001b[43m \u001b[49m\u001b[43mobj\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 884\u001b[0m \u001b[43m \u001b[49m\u001b[43mkeys\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 885\u001b[0m \u001b[43m \u001b[49m\u001b[43maxis\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43maxis\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 886\u001b[0m \u001b[43m \u001b[49m\u001b[43mlevel\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mlevel\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 887\u001b[0m \u001b[43m \u001b[49m\u001b[43msort\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43msort\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 888\u001b[0m \u001b[43m \u001b[49m\u001b[43mobserved\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mobserved\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 889\u001b[0m \u001b[43m \u001b[49m\u001b[43mmutated\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmutated\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 890\u001b[0m \u001b[43m \u001b[49m\u001b[43mdropna\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdropna\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 891\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 893\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mobj \u001b[38;5;241m=\u001b[39m obj\n\u001b[1;32m 894\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39maxis \u001b[38;5;241m=\u001b[39m obj\u001b[38;5;241m.\u001b[39m_get_axis_number(axis)\n", + "File \u001b[0;32m~/miniconda3/lib/python3.9/site-packages/pandas/core/groupby/grouper.py:882\u001b[0m, in \u001b[0;36mget_grouper\u001b[0;34m(obj, key, axis, level, sort, observed, mutated, validate, dropna)\u001b[0m\n\u001b[1;32m 880\u001b[0m in_axis, level, gpr \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mFalse\u001b[39;00m, gpr, \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m 881\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m--> 882\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m(gpr)\n\u001b[1;32m 883\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(gpr, Grouper) \u001b[38;5;129;01mand\u001b[39;00m gpr\u001b[38;5;241m.\u001b[39mkey \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 884\u001b[0m \u001b[38;5;66;03m# Add key to exclusions\u001b[39;00m\n\u001b[1;32m 885\u001b[0m exclusions\u001b[38;5;241m.\u001b[39madd(gpr\u001b[38;5;241m.\u001b[39mkey)\n", + "\u001b[0;31mKeyError\u001b[0m: 'Gender'" + ] + } + ], + "source": [ + "\n", + "from sklearn.metrics import precision_score\n", + "\n", + "x_test = test_data.drop(columns=['Good Candidate'], axis=1)\n", + "prediction = model.predict(x_test) \n", + "\n", + "# Add the 'Prediction' column to evaluation_using_major\n", + "evaluation_using_major['Prediction'] = prediction\n", + "\n", + "# Calculate precision by 'Major'\n", + "precision_major = evaluation_using_major.groupby('Gender').apply(\n", + " lambda x: precision_score(x['Good Candidate'], x['Prediction'], zero_division=0)\n", + ")\n", + "\n", + "print(\"Precision by Gender:\")\n", + "print(precision_gender)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 82, + "id": "221162a5", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Precision for Age:\n", + "Age\n", + "18 0.900000\n", + "19 0.909091\n", + "20 0.782609\n", + "21 0.833333\n", + "22 0.732143\n", + "23 0.875000\n", + "24 1.000000\n", + "25 1.000000\n", + "dtype: float64\n" + ] + } + ], + "source": [ + "from sklearn.metrics import precision_score\n", + "\n", + "x_test = test_data.drop(columns=['Good Candidate'], axis=1)\n", + "prediction = model.predict(x_test) \n", + "\n", + "# Add the 'Prediction' column to evaluation_using_age\n", + "evaluation_using_age['Prediction'] = prediction\n", + "\n", + "# Calculate precision by 'Age'\n", + "precision_age = evaluation_using_age.groupby('Age').apply(\n", + " lambda x: precision_score(x['Good Candidate'], x['Prediction'], zero_division=0)\n", + ")\n", + "\n", + "print(\"Precision for Age:\")\n", + "print(precision_age)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 86, + "id": "c7292c69", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Precision for Major:\n", + "Major\n", + "Business 0.897436\n", + "Computer Science 0.775000\n", + "Electrical and Computer Engineering 0.861111\n", + "Information Systems 0.794872\n", + "Math 0.844444\n", + "Statistics and Machine Learning 0.763158\n", + "dtype: float64\n" + ] + } + ], + "source": [ + "from sklearn.metrics import precision_score\n", + "\n", + "x_test = test_data.drop(columns=['Good Candidate'], axis=1)\n", + "\n", + "prediction = model.predict(x_test) \n", + "\n", + "# Add the 'Prediction' column to evaluation_using_major\n", + "evaluation_using_age['Prediction'] = prediction\n", + "precision_major = evaluation_using_major.groupby('Major').apply(\n", + " lambda x: precision_score(x['Good Candidate'], x['Prediction'], zero_division=0)\n", + ")\n", + "\n", + "print(\"Precision for Major:\")\n", + "print(precision_major)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6f18be1b", + "metadata": {}, + "outputs": [], + "source": [ + "#First Fairness evaluation tool: Group Unaware" + ] + }, + { + "cell_type": "code", + "execution_count": 97, + "id": "fe7fafdb", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The model is Group Aware for Age attribute.\n" + ] + } + ], + "source": [ + "is_age_used = 'Age' in x_test.columns\n", + "group_unaware_status_age = \"Group Unaware\" if not is_age_used else \"Group Aware\"\n", + "print(\"The model is \" + group_unaware_status_age + \" for Age attribute.\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": 99, + "id": "e515a4f4", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "done\n" + "The model is Group Aware for Major attribute.\n" + ] + } + ], + "source": [ + "is_major_used = 'Major' in x_test.columns\n", + "group_unaware_status_major = \"Group Unaware\" if not is_major_used else \"Group Aware\"\n", + "print(\"The model is \" + group_unaware_status_major + \" for Major attribute.\")" + ] + }, + { + "cell_type": "code", + "execution_count": 101, + "id": "791586bc", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The model is Group Aware for Gender attribute.\n" + ] + } + ], + "source": [ + "is_gender_used = 'Gender' in x_test.columns\n", + "group_unaware_status = \"Group Unaware\" if not is_gender_used else \"Group Aware\"\n", + "print(\"The model is \" + group_unaware_status_major + \" for Gender attribute.\")" + ] + }, + { + "cell_type": "code", + "execution_count": 135, + "id": "76e42835", + "metadata": {}, + "outputs": [], + "source": [ + "#2nd : Demographic Parity \n", + "\n", + "def demographic_parity(data, feature):\n", + "\n", + " selection_feature = data.groupby(feature)['Good Candidate'].mean()\n", + "\n", + " # Check demographic parity\n", + " demographic_parity = selection_feature / selection_feature.mean()\n", + "\n", + " print(f\"Demographic Parity for {feature}:\")\n", + " print(demographic_parity)" + ] + }, + { + "cell_type": "code", + "execution_count": 136, + "id": "97a221c5", + "metadata": {}, + "outputs": [ + { + "ename": "KeyError", + "evalue": "'Gender'", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)", + "Input \u001b[0;32mIn [136]\u001b[0m, in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mdemographic_parity\u001b[49m\u001b[43m(\u001b[49m\u001b[43mtest_data\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mGender\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m)\u001b[49m\n", + "Input \u001b[0;32mIn [135]\u001b[0m, in \u001b[0;36mdemographic_parity\u001b[0;34m(data, feature)\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mdemographic_parity\u001b[39m(data, feature):\n\u001b[0;32m----> 5\u001b[0m selection_feature \u001b[38;5;241m=\u001b[39m \u001b[43mdata\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mgroupby\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfeature\u001b[49m\u001b[43m)\u001b[49m[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mGood Candidate\u001b[39m\u001b[38;5;124m'\u001b[39m]\u001b[38;5;241m.\u001b[39mmean()\n\u001b[1;32m 7\u001b[0m \u001b[38;5;66;03m# Check demographic parity\u001b[39;00m\n\u001b[1;32m 8\u001b[0m demographic_parity \u001b[38;5;241m=\u001b[39m selection_feature \u001b[38;5;241m/\u001b[39m selection_feature\u001b[38;5;241m.\u001b[39mmean()\n", + "File \u001b[0;32m~/miniconda3/lib/python3.9/site-packages/pandas/core/frame.py:7718\u001b[0m, in \u001b[0;36mDataFrame.groupby\u001b[0;34m(self, by, axis, level, as_index, sort, group_keys, squeeze, observed, dropna)\u001b[0m\n\u001b[1;32m 7713\u001b[0m axis \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_get_axis_number(axis)\n\u001b[1;32m 7715\u001b[0m \u001b[38;5;66;03m# https://github.com/python/mypy/issues/7642\u001b[39;00m\n\u001b[1;32m 7716\u001b[0m \u001b[38;5;66;03m# error: Argument \"squeeze\" to \"DataFrameGroupBy\" has incompatible type\u001b[39;00m\n\u001b[1;32m 7717\u001b[0m \u001b[38;5;66;03m# \"Union[bool, NoDefault]\"; expected \"bool\"\u001b[39;00m\n\u001b[0;32m-> 7718\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mDataFrameGroupBy\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 7719\u001b[0m \u001b[43m \u001b[49m\u001b[43mobj\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 7720\u001b[0m \u001b[43m \u001b[49m\u001b[43mkeys\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mby\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 7721\u001b[0m \u001b[43m \u001b[49m\u001b[43maxis\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43maxis\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 7722\u001b[0m \u001b[43m \u001b[49m\u001b[43mlevel\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mlevel\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 7723\u001b[0m \u001b[43m \u001b[49m\u001b[43mas_index\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mas_index\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 7724\u001b[0m \u001b[43m \u001b[49m\u001b[43msort\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43msort\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 7725\u001b[0m \u001b[43m \u001b[49m\u001b[43mgroup_keys\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mgroup_keys\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 7726\u001b[0m \u001b[43m \u001b[49m\u001b[43msqueeze\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43msqueeze\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;66;43;03m# type: ignore[arg-type]\u001b[39;49;00m\n\u001b[1;32m 7727\u001b[0m \u001b[43m \u001b[49m\u001b[43mobserved\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mobserved\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 7728\u001b[0m \u001b[43m \u001b[49m\u001b[43mdropna\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdropna\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 7729\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/miniconda3/lib/python3.9/site-packages/pandas/core/groupby/groupby.py:882\u001b[0m, in \u001b[0;36mGroupBy.__init__\u001b[0;34m(self, obj, keys, axis, level, grouper, exclusions, selection, as_index, sort, group_keys, squeeze, observed, mutated, dropna)\u001b[0m\n\u001b[1;32m 879\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m grouper \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 880\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mpandas\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mcore\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mgroupby\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mgrouper\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m get_grouper\n\u001b[0;32m--> 882\u001b[0m grouper, exclusions, obj \u001b[38;5;241m=\u001b[39m \u001b[43mget_grouper\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 883\u001b[0m \u001b[43m \u001b[49m\u001b[43mobj\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 884\u001b[0m \u001b[43m \u001b[49m\u001b[43mkeys\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 885\u001b[0m \u001b[43m \u001b[49m\u001b[43maxis\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43maxis\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 886\u001b[0m \u001b[43m \u001b[49m\u001b[43mlevel\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mlevel\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 887\u001b[0m \u001b[43m \u001b[49m\u001b[43msort\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43msort\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 888\u001b[0m \u001b[43m \u001b[49m\u001b[43mobserved\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mobserved\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 889\u001b[0m \u001b[43m \u001b[49m\u001b[43mmutated\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmutated\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 890\u001b[0m \u001b[43m \u001b[49m\u001b[43mdropna\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdropna\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 891\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 893\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mobj \u001b[38;5;241m=\u001b[39m obj\n\u001b[1;32m 894\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39maxis \u001b[38;5;241m=\u001b[39m obj\u001b[38;5;241m.\u001b[39m_get_axis_number(axis)\n", + "File \u001b[0;32m~/miniconda3/lib/python3.9/site-packages/pandas/core/groupby/grouper.py:882\u001b[0m, in \u001b[0;36mget_grouper\u001b[0;34m(obj, key, axis, level, sort, observed, mutated, validate, dropna)\u001b[0m\n\u001b[1;32m 880\u001b[0m in_axis, level, gpr \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mFalse\u001b[39;00m, gpr, \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m 881\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m--> 882\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m(gpr)\n\u001b[1;32m 883\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(gpr, Grouper) \u001b[38;5;129;01mand\u001b[39;00m gpr\u001b[38;5;241m.\u001b[39mkey \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 884\u001b[0m \u001b[38;5;66;03m# Add key to exclusions\u001b[39;00m\n\u001b[1;32m 885\u001b[0m exclusions\u001b[38;5;241m.\u001b[39madd(gpr\u001b[38;5;241m.\u001b[39mkey)\n", + "\u001b[0;31mKeyError\u001b[0m: 'Gender'" + ] + } + ], + "source": [ + "demographic_parity(test_data, 'Gender')" + ] + }, + { + "cell_type": "code", + "execution_count": 137, + "id": "dd45687e", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Demographic Parity for Age:\n", + "Age\n", + "18 0.770865\n", + "19 0.703334\n", + "20 0.757607\n", + "21 0.672755\n", + "22 0.781483\n", + "23 1.055301\n", + "24 1.576769\n", + "25 1.681887\n", + "Name: Good Candidate, dtype: float64\n" + ] + } + ], + "source": [ + "demographic_parity(test_data, 'Age')" + ] + }, + { + "cell_type": "code", + "execution_count": 138, + "id": "5537c6e1", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Demographic Parity for Major:\n", + "Major\n", + "Business 1.039182\n", + "Computer Science 0.865119\n", + "Electrical and Computer Engineering 1.002068\n", + "Information Systems 1.106129\n", + "Math 1.007398\n", + "Statistics and Machine Learning 0.980105\n", + "Name: Good Candidate, dtype: float64\n" + ] + } + ], + "source": [ + "demographic_parity(test_data, 'Major')" + ] + }, + { + "cell_type": "code", + "execution_count": 112, + "id": "0ad9f500", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Good candidates for men (predicted): 0.6\n", + "Good candidates for women, predicted: 0.6\n" + ] + } + ], + "source": [ + "# #Demographic Parity\n", + "# import numpy as np\n", + "\n", + "# # Example: Define predicted and actual values for men\n", + "# predicted_men = [1, 0, 1, 1, 0]\n", + "# actual_men = [1, 1, 0, 1, 0]\n", + "\n", + "# # Function to calculate positive rate\n", + "# def calculate_positive_rate(predictions, actuals):\n", + "# numpy_predictions = np.array(predictions)\n", + "# positive_count = np.sum(numpy_predictions == 1)\n", + "# total_count = len(predictions)\n", + "# return positive_count / total_count\n", + "\n", + "# # Calculate and print positive rates for men\n", + "# prediction_men = calculate_positive_rate(predicted_men, actual_men)\n", + "# print(f\"Good candidates for men (predicted): {prediction_men}\")\n", + "\n", + "# # Example: Define predicted and actual values for women\n", + "# predicted_women = [0, 1, 0, 1, 1]\n", + "# actual_women = [1, 0, 1, 0, 1]\n", + "\n", + "# # Calculate and print positive rates for women\n", + "# prediction_women = calculate_positive_rate(predicted_women, actual_women)\n", + "# print(f\"Good candidates for women, predicted: {prediction_women}\")\n", + "# x\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6007c896", + "metadata": {}, + "outputs": [], + "source": [ + "#equality odds" + ] + }, + { + "cell_type": "code", + "execution_count": 132, + "id": "67ca6bd5", + "metadata": {}, + "outputs": [], + "source": [ + "def equality_odds(data, target_variable, sensitive_feature, predicted_prob_column):\n", + "\n", + " data['Prediction'] = prediction\n", + "\n", + " # Calculate true positive rates\n", + " tpr_by_group = data[data[target_variable] == 1].groupby(sensitive_feature)['Prediction'].mean()\n", + "\n", + " # Print true positive rates\n", + " print(f\"\\nTrue Positive Rates by {sensitive_feature}:\")\n", + " print(tpr_by_group)\n", + "\n", + " # Calculate false positive rates\n", + " fpr_by_group = data[data[target_variable] == 0].groupby(sensitive_feature)['Prediction'].mean()\n", + "\n", + " # Print false positive rates\n", + " print(f\"\\nFalse Positive Rates by {sensitive_feature}:\")\n", + " print(fpr_by_group)" + ] + }, + { + "cell_type": "code", + "execution_count": 133, + "id": "342a5098", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "True Positive Rates by Age:\n", + "Age\n", + "18 0.818182\n", + "19 0.869565\n", + "20 0.720000\n", + "21 0.892857\n", + "22 0.891304\n", + "23 0.875000\n", + "24 0.666667\n", + "25 0.250000\n", + "Name: Prediction, dtype: float64\n", + "\n", + "False Positive Rates by Age:\n", + "Age\n", + "18 0.076923\n", + "19 0.062500\n", + "20 0.163934\n", + "21 0.119048\n", + "22 0.283019\n", + "23 0.210526\n", + "24 0.000000\n", + "Name: Prediction, dtype: float64\n" + ] + } + ], + "source": [ + "equality_odds(test_data, 'Good Candidate', 'Age', 'Prediction')" + ] + }, + { + "cell_type": "code", + "execution_count": 130, + "id": "9f0a7b95", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "True Positive Rates by Major:\n", + "Major\n", + "Business 0.875000\n", + "Computer Science 0.837838\n", + "Electrical and Computer Engineering 0.775000\n", + "Information Systems 0.756098\n", + "Math 0.844444\n", + "Statistics and Machine Learning 0.852941\n", + "Name: Prediction, dtype: float64\n", + "\n", + "False Positive Rates by Major:\n", + "Major\n", + "Business 0.097561\n", + "Computer Science 0.169811\n", + "Electrical and Computer Engineering 0.113636\n", + "Information Systems 0.216216\n", + "Math 0.142857\n", + "Statistics and Machine Learning 0.230769\n", + "Name: Prediction, dtype: float64\n" + ] + } + ], + "source": [ + "equality_odds(test_data, 'Good Candidate', 'Major', 'Prediction')" + ] + }, + { + "cell_type": "code", + "execution_count": 127, + "id": "a2d5335d", + "metadata": {}, + "outputs": [ + { + "ename": "KeyError", + "evalue": "'Gender'", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)", + "Input \u001b[0;32mIn [127]\u001b[0m, in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mequality_odds\u001b[49m\u001b[43m(\u001b[49m\u001b[43mtest_data\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mGood Candidate\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mGender\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mPrediction\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m)\u001b[49m\n", + "Input \u001b[0;32mIn [124]\u001b[0m, in \u001b[0;36mequality_odds\u001b[0;34m(data, target_variable, sensitive_feature, predicted_prob_column)\u001b[0m\n\u001b[1;32m 3\u001b[0m data[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mPrediction\u001b[39m\u001b[38;5;124m'\u001b[39m] \u001b[38;5;241m=\u001b[39m prediction\n\u001b[1;32m 5\u001b[0m \u001b[38;5;66;03m# Calculate true positive rates\u001b[39;00m\n\u001b[0;32m----> 6\u001b[0m tpr_by_group \u001b[38;5;241m=\u001b[39m \u001b[43mdata\u001b[49m\u001b[43m[\u001b[49m\u001b[43mdata\u001b[49m\u001b[43m[\u001b[49m\u001b[43mtarget_variable\u001b[49m\u001b[43m]\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m==\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;241;43m1\u001b[39;49m\u001b[43m]\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mgroupby\u001b[49m\u001b[43m(\u001b[49m\u001b[43msensitive_feature\u001b[49m\u001b[43m)\u001b[49m[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mPrediction\u001b[39m\u001b[38;5;124m'\u001b[39m]\u001b[38;5;241m.\u001b[39mmean()\n\u001b[1;32m 8\u001b[0m \u001b[38;5;66;03m# Print true positive rates\u001b[39;00m\n\u001b[1;32m 9\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124mTrue Positive Rates by \u001b[39m\u001b[38;5;132;01m{\u001b[39;00msensitive_feature\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m:\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n", + "File \u001b[0;32m~/miniconda3/lib/python3.9/site-packages/pandas/core/frame.py:7718\u001b[0m, in \u001b[0;36mDataFrame.groupby\u001b[0;34m(self, by, axis, level, as_index, sort, group_keys, squeeze, observed, dropna)\u001b[0m\n\u001b[1;32m 7713\u001b[0m axis \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_get_axis_number(axis)\n\u001b[1;32m 7715\u001b[0m \u001b[38;5;66;03m# https://github.com/python/mypy/issues/7642\u001b[39;00m\n\u001b[1;32m 7716\u001b[0m \u001b[38;5;66;03m# error: Argument \"squeeze\" to \"DataFrameGroupBy\" has incompatible type\u001b[39;00m\n\u001b[1;32m 7717\u001b[0m \u001b[38;5;66;03m# \"Union[bool, NoDefault]\"; expected \"bool\"\u001b[39;00m\n\u001b[0;32m-> 7718\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mDataFrameGroupBy\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 7719\u001b[0m \u001b[43m \u001b[49m\u001b[43mobj\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 7720\u001b[0m \u001b[43m \u001b[49m\u001b[43mkeys\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mby\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 7721\u001b[0m \u001b[43m \u001b[49m\u001b[43maxis\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43maxis\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 7722\u001b[0m \u001b[43m \u001b[49m\u001b[43mlevel\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mlevel\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 7723\u001b[0m \u001b[43m \u001b[49m\u001b[43mas_index\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mas_index\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 7724\u001b[0m \u001b[43m \u001b[49m\u001b[43msort\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43msort\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 7725\u001b[0m \u001b[43m \u001b[49m\u001b[43mgroup_keys\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mgroup_keys\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 7726\u001b[0m \u001b[43m \u001b[49m\u001b[43msqueeze\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43msqueeze\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;66;43;03m# type: ignore[arg-type]\u001b[39;49;00m\n\u001b[1;32m 7727\u001b[0m \u001b[43m \u001b[49m\u001b[43mobserved\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mobserved\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 7728\u001b[0m \u001b[43m \u001b[49m\u001b[43mdropna\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdropna\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 7729\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/miniconda3/lib/python3.9/site-packages/pandas/core/groupby/groupby.py:882\u001b[0m, in \u001b[0;36mGroupBy.__init__\u001b[0;34m(self, obj, keys, axis, level, grouper, exclusions, selection, as_index, sort, group_keys, squeeze, observed, mutated, dropna)\u001b[0m\n\u001b[1;32m 879\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m grouper \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 880\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mpandas\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mcore\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mgroupby\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mgrouper\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m get_grouper\n\u001b[0;32m--> 882\u001b[0m grouper, exclusions, obj \u001b[38;5;241m=\u001b[39m \u001b[43mget_grouper\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 883\u001b[0m \u001b[43m \u001b[49m\u001b[43mobj\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 884\u001b[0m \u001b[43m \u001b[49m\u001b[43mkeys\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 885\u001b[0m \u001b[43m \u001b[49m\u001b[43maxis\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43maxis\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 886\u001b[0m \u001b[43m \u001b[49m\u001b[43mlevel\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mlevel\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 887\u001b[0m \u001b[43m \u001b[49m\u001b[43msort\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43msort\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 888\u001b[0m \u001b[43m \u001b[49m\u001b[43mobserved\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mobserved\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 889\u001b[0m \u001b[43m \u001b[49m\u001b[43mmutated\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmutated\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 890\u001b[0m \u001b[43m \u001b[49m\u001b[43mdropna\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdropna\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 891\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 893\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mobj \u001b[38;5;241m=\u001b[39m obj\n\u001b[1;32m 894\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39maxis \u001b[38;5;241m=\u001b[39m obj\u001b[38;5;241m.\u001b[39m_get_axis_number(axis)\n", + "File \u001b[0;32m~/miniconda3/lib/python3.9/site-packages/pandas/core/groupby/grouper.py:882\u001b[0m, in \u001b[0;36mget_grouper\u001b[0;34m(obj, key, axis, level, sort, observed, mutated, validate, dropna)\u001b[0m\n\u001b[1;32m 880\u001b[0m in_axis, level, gpr \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mFalse\u001b[39;00m, gpr, \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m 881\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m--> 882\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m(gpr)\n\u001b[1;32m 883\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(gpr, Grouper) \u001b[38;5;129;01mand\u001b[39;00m gpr\u001b[38;5;241m.\u001b[39mkey \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 884\u001b[0m \u001b[38;5;66;03m# Add key to exclusions\u001b[39;00m\n\u001b[1;32m 885\u001b[0m exclusions\u001b[38;5;241m.\u001b[39madd(gpr\u001b[38;5;241m.\u001b[39mkey)\n", + "\u001b[0;31mKeyError\u001b[0m: 'Gender'" ] } ], "source": [ - "print(\"done\") " + "equality_odds(test_data, 'Good Candidate', 'Gender', 'Prediction')" ] }, { "cell_type": "code", "execution_count": null, - "id": "f9ada5bf", + "id": "d0fe341c", "metadata": {}, "outputs": [], "source": []