diff --git a/lab-customer-analysis-round-1-checkpoint.ipynb b/lab-customer-analysis-round-1-checkpoint.ipynb new file mode 100644 index 0000000..0fa7fff --- /dev/null +++ b/lab-customer-analysis-round-1-checkpoint.ipynb @@ -0,0 +1,2543 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 101, + "id": "c43915d2", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
CustomerSTGENDEREducationCustomer Lifetime ValueIncomeMonthly Premium AutoNumber of Open ComplaintsPolicy TypeVehicle ClassTotal Claim Amount
0RB50392WashingtonNaNMasterNaN0.01000.01/0/00Personal AutoFour-Door Car2.704934
1QZ44356ArizonaFBachelor697953.59%0.094.01/0/00Personal AutoFour-Door Car1131.464935
2AI49188NevadaFBachelor1288743.17%48767.0108.01/0/00Personal AutoTwo-Door Car566.472247
3WW63253CaliforniaMBachelor764586.18%0.0106.01/0/00Corporate AutoSUV529.881344
4GA49547WashingtonMHigh School or Below536307.65%36357.068.01/0/00Personal AutoFour-Door Car17.269323
....................................
4003NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
4004NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
4005NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
4006NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
4007NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
\n", + "

4008 rows × 11 columns

\n", + "
" + ], + "text/plain": [ + " Customer ST GENDER Education \\\n", + "0 RB50392 Washington NaN Master \n", + "1 QZ44356 Arizona F Bachelor \n", + "2 AI49188 Nevada F Bachelor \n", + "3 WW63253 California M Bachelor \n", + "4 GA49547 Washington M High School or Below \n", + "... ... ... ... ... \n", + "4003 NaN NaN NaN NaN \n", + "4004 NaN NaN NaN NaN \n", + "4005 NaN NaN NaN NaN \n", + "4006 NaN NaN NaN NaN \n", + "4007 NaN NaN NaN NaN \n", + "\n", + " Customer Lifetime Value Income Monthly Premium Auto \\\n", + "0 NaN 0.0 1000.0 \n", + "1 697953.59% 0.0 94.0 \n", + "2 1288743.17% 48767.0 108.0 \n", + "3 764586.18% 0.0 106.0 \n", + "4 536307.65% 36357.0 68.0 \n", + "... ... ... ... \n", + "4003 NaN NaN NaN \n", + "4004 NaN NaN NaN \n", + "4005 NaN NaN NaN \n", + "4006 NaN NaN NaN \n", + "4007 NaN NaN NaN \n", + "\n", + " Number of Open Complaints Policy Type Vehicle Class \\\n", + "0 1/0/00 Personal Auto Four-Door Car \n", + "1 1/0/00 Personal Auto Four-Door Car \n", + "2 1/0/00 Personal Auto Two-Door Car \n", + "3 1/0/00 Corporate Auto SUV \n", + "4 1/0/00 Personal Auto Four-Door Car \n", + "... ... ... ... \n", + "4003 NaN NaN NaN \n", + "4004 NaN NaN NaN \n", + "4005 NaN NaN NaN \n", + "4006 NaN NaN NaN \n", + "4007 NaN NaN NaN \n", + "\n", + " Total Claim Amount \n", + "0 2.704934 \n", + "1 1131.464935 \n", + "2 566.472247 \n", + "3 529.881344 \n", + "4 17.269323 \n", + "... ... \n", + "4003 NaN \n", + "4004 NaN \n", + "4005 NaN \n", + "4006 NaN \n", + "4007 NaN \n", + "\n", + "[4008 rows x 11 columns]" + ] + }, + "execution_count": 101, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import pandas as pd\n", + "\n", + "tab1_data = pd.read_csv(r'C:\\Users\\david\\OneDrive\\Ambiente de Trabalho\\Iron Hack\\ficheiros para LAB PANDA 1\\file1.csv')\n", + "\n", + "tab1_data\n", + "\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 102, + "id": "297404d5", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
CustomerSTGENDEREducationCustomer Lifetime ValueIncomeMonthly Premium AutoNumber of Open ComplaintsTotal Claim AmountPolicy TypeVehicle Class
0GS98873ArizonaFBachelor323912.47%16061881/0/00633.600000Personal AutoFour-Door Car
1CW49887CaliforniaFMaster462680.11%794871141/0/00547.200000Special AutoSUV
2MY31220CaliforniaFCollege899704.02%542301121/0/00537.600000Personal AutoTwo-Door Car
3UH35128OregonFCollege2580706.30%712102141/1/001027.200000Personal AutoLuxury Car
4WH52799ArizonaFCollege380812.21%94903941/0/00451.200000Corporate AutoTwo-Door Car
....................................
991HV85198ArizonaMMaster847141.75%63513701/0/00185.667213Personal AutoFour-Door Car
992BS91566ArizonaFCollege543121.91%58161681/0/00140.747286Corporate AutoFour-Door Car
993IL40123NevadaFCollege568964.41%83640701/0/00471.050488Corporate AutoTwo-Door Car
994MY32149CaliforniaFMaster368672.38%0961/0/0028.460568Personal AutoTwo-Door Car
995SA91515CaliforniaMBachelor399258.39%01111/0/00700.349052Personal AutoSUV
\n", + "

996 rows × 11 columns

\n", + "
" + ], + "text/plain": [ + " Customer ST GENDER Education Customer Lifetime Value Income \\\n", + "0 GS98873 Arizona F Bachelor 323912.47% 16061 \n", + "1 CW49887 California F Master 462680.11% 79487 \n", + "2 MY31220 California F College 899704.02% 54230 \n", + "3 UH35128 Oregon F College 2580706.30% 71210 \n", + "4 WH52799 Arizona F College 380812.21% 94903 \n", + ".. ... ... ... ... ... ... \n", + "991 HV85198 Arizona M Master 847141.75% 63513 \n", + "992 BS91566 Arizona F College 543121.91% 58161 \n", + "993 IL40123 Nevada F College 568964.41% 83640 \n", + "994 MY32149 California F Master 368672.38% 0 \n", + "995 SA91515 California M Bachelor 399258.39% 0 \n", + "\n", + " Monthly Premium Auto Number of Open Complaints Total Claim Amount \\\n", + "0 88 1/0/00 633.600000 \n", + "1 114 1/0/00 547.200000 \n", + "2 112 1/0/00 537.600000 \n", + "3 214 1/1/00 1027.200000 \n", + "4 94 1/0/00 451.200000 \n", + ".. ... ... ... \n", + "991 70 1/0/00 185.667213 \n", + "992 68 1/0/00 140.747286 \n", + "993 70 1/0/00 471.050488 \n", + "994 96 1/0/00 28.460568 \n", + "995 111 1/0/00 700.349052 \n", + "\n", + " Policy Type Vehicle Class \n", + "0 Personal Auto Four-Door Car \n", + "1 Special Auto SUV \n", + "2 Personal Auto Two-Door Car \n", + "3 Personal Auto Luxury Car \n", + "4 Corporate Auto Two-Door Car \n", + ".. ... ... \n", + "991 Personal Auto Four-Door Car \n", + "992 Corporate Auto Four-Door Car \n", + "993 Corporate Auto Two-Door Car \n", + "994 Personal Auto Two-Door Car \n", + "995 Personal Auto SUV \n", + "\n", + "[996 rows x 11 columns]" + ] + }, + "execution_count": 102, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "tab2_data = pd.read_csv(r'C:\\Users\\david\\OneDrive\\Ambiente de Trabalho\\Iron Hack\\ficheiros para LAB PANDA 1\\file2.csv')\n", + "\n", + "tab2_data\n" + ] + }, + { + "cell_type": "code", + "execution_count": 103, + "id": "5698462b", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
CustomerStateCustomer Lifetime ValueEducationGenderIncomeMonthly Premium AutoNumber of Open ComplaintsPolicy TypeTotal Claim AmountVehicle Class
0SA25987Washington3479.137523High School or BelowM01040Personal Auto499.200000Two-Door Car
1TB86706Arizona2502.637401MasterM0660Personal Auto3.468912Two-Door Car
2ZL73902Nevada3265.156348BachelorF25820820Personal Auto393.600000Four-Door Car
3KX23516California4455.843406High School or BelowF01210Personal Auto699.615192SUV
4FN77294California7704.958480High School or BelowM303661012Personal Auto484.800000SUV
....................................
7065LA72316California23405.987980BachelorM71941730Personal Auto198.234764Four-Door Car
7066PK87824California3096.511217CollegeF21604790Corporate Auto379.200000Four-Door Car
7067TD14365California8163.890428BachelorM0853Corporate Auto790.784983Four-Door Car
7068UP19263California7524.442436CollegeM21941960Personal Auto691.200000Four-Door Car
7069Y167826California2611.836866CollegeM0770Corporate Auto369.600000Two-Door Car
\n", + "

7070 rows × 11 columns

\n", + "
" + ], + "text/plain": [ + " Customer State Customer Lifetime Value Education \\\n", + "0 SA25987 Washington 3479.137523 High School or Below \n", + "1 TB86706 Arizona 2502.637401 Master \n", + "2 ZL73902 Nevada 3265.156348 Bachelor \n", + "3 KX23516 California 4455.843406 High School or Below \n", + "4 FN77294 California 7704.958480 High School or Below \n", + "... ... ... ... ... \n", + "7065 LA72316 California 23405.987980 Bachelor \n", + "7066 PK87824 California 3096.511217 College \n", + "7067 TD14365 California 8163.890428 Bachelor \n", + "7068 UP19263 California 7524.442436 College \n", + "7069 Y167826 California 2611.836866 College \n", + "\n", + " Gender Income Monthly Premium Auto Number of Open Complaints \\\n", + "0 M 0 104 0 \n", + "1 M 0 66 0 \n", + "2 F 25820 82 0 \n", + "3 F 0 121 0 \n", + "4 M 30366 101 2 \n", + "... ... ... ... ... \n", + "7065 M 71941 73 0 \n", + "7066 F 21604 79 0 \n", + "7067 M 0 85 3 \n", + "7068 M 21941 96 0 \n", + "7069 M 0 77 0 \n", + "\n", + " Policy Type Total Claim Amount Vehicle Class \n", + "0 Personal Auto 499.200000 Two-Door Car \n", + "1 Personal Auto 3.468912 Two-Door Car \n", + "2 Personal Auto 393.600000 Four-Door Car \n", + "3 Personal Auto 699.615192 SUV \n", + "4 Personal Auto 484.800000 SUV \n", + "... ... ... ... \n", + "7065 Personal Auto 198.234764 Four-Door Car \n", + "7066 Corporate Auto 379.200000 Four-Door Car \n", + "7067 Corporate Auto 790.784983 Four-Door Car \n", + "7068 Personal Auto 691.200000 Four-Door Car \n", + "7069 Corporate Auto 369.600000 Two-Door Car \n", + "\n", + "[7070 rows x 11 columns]" + ] + }, + "execution_count": 103, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "tab3_data = pd.read_csv(r'C:\\Users\\david\\OneDrive\\Ambiente de Trabalho\\Iron Hack\\ficheiros para LAB PANDA 1\\file3.csv')\n", + "\n", + "tab3_data\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 104, + "id": "10a878b6", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(7070, 11)" + ] + }, + "execution_count": 104, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + " # SHOW THE DATAFRAME'S SHAPE\n", + "\n", + "tab1_data.shape\n", + "tab2_data.shape\n", + "tab3_data.shape\n" + ] + }, + { + "cell_type": "code", + "execution_count": 105, + "id": "7eaccb15", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
customerstgendereducationcustomer_lifetime_valueincomemonthly_premium_autonumber_of_open_complaintspolicy_typevehicle_classtotal_claim_amount
0RB50392WashingtonNaNMasterNaN0.01000.01/0/00Personal AutoFour-Door Car2.704934
1QZ44356ArizonaFBachelor697953.59%0.094.01/0/00Personal AutoFour-Door Car1131.464935
2AI49188NevadaFBachelor1288743.17%48767.0108.01/0/00Personal AutoTwo-Door Car566.472247
3WW63253CaliforniaMBachelor764586.18%0.0106.01/0/00Corporate AutoSUV529.881344
4GA49547WashingtonMHigh School or Below536307.65%36357.068.01/0/00Personal AutoFour-Door Car17.269323
....................................
4003NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
4004NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
4005NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
4006NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
4007NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
\n", + "

4008 rows × 11 columns

\n", + "
" + ], + "text/plain": [ + " customer st gender education \\\n", + "0 RB50392 Washington NaN Master \n", + "1 QZ44356 Arizona F Bachelor \n", + "2 AI49188 Nevada F Bachelor \n", + "3 WW63253 California M Bachelor \n", + "4 GA49547 Washington M High School or Below \n", + "... ... ... ... ... \n", + "4003 NaN NaN NaN NaN \n", + "4004 NaN NaN NaN NaN \n", + "4005 NaN NaN NaN NaN \n", + "4006 NaN NaN NaN NaN \n", + "4007 NaN NaN NaN NaN \n", + "\n", + " customer_lifetime_value income monthly_premium_auto \\\n", + "0 NaN 0.0 1000.0 \n", + "1 697953.59% 0.0 94.0 \n", + "2 1288743.17% 48767.0 108.0 \n", + "3 764586.18% 0.0 106.0 \n", + "4 536307.65% 36357.0 68.0 \n", + "... ... ... ... \n", + "4003 NaN NaN NaN \n", + "4004 NaN NaN NaN \n", + "4005 NaN NaN NaN \n", + "4006 NaN NaN NaN \n", + "4007 NaN NaN NaN \n", + "\n", + " number_of_open_complaints policy_type vehicle_class \\\n", + "0 1/0/00 Personal Auto Four-Door Car \n", + "1 1/0/00 Personal Auto Four-Door Car \n", + "2 1/0/00 Personal Auto Two-Door Car \n", + "3 1/0/00 Corporate Auto SUV \n", + "4 1/0/00 Personal Auto Four-Door Car \n", + "... ... ... ... \n", + "4003 NaN NaN NaN \n", + "4004 NaN NaN NaN \n", + "4005 NaN NaN NaN \n", + "4006 NaN NaN NaN \n", + "4007 NaN NaN NaN \n", + "\n", + " total_claim_amount \n", + "0 2.704934 \n", + "1 1131.464935 \n", + "2 566.472247 \n", + "3 529.881344 \n", + "4 17.269323 \n", + "... ... \n", + "4003 NaN \n", + "4004 NaN \n", + "4005 NaN \n", + "4006 NaN \n", + "4007 NaN \n", + "\n", + "[4008 rows x 11 columns]" + ] + }, + "execution_count": 105, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + " # STANDARDIZE HEADER NAMES TABELA 1\n", + " \n", + "cols = []\n", + "for i in range(len(tab1_data.columns)): \n", + " cols.append(tab1_data.columns[i].lower().replace(' ', '_')) \n", + "tab1_data.columns = cols\n", + "tab1_data\n" + ] + }, + { + "cell_type": "code", + "execution_count": 106, + "id": "bb444eec", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
customerstgendereducationcustomer_lifetime_valueincomemonthly_premium_autonumber_of_open_complaintstotal_claim_amountpolicy_typevehicle_class
0GS98873ArizonaFBachelor323912.47%16061881/0/00633.600000Personal AutoFour-Door Car
1CW49887CaliforniaFMaster462680.11%794871141/0/00547.200000Special AutoSUV
2MY31220CaliforniaFCollege899704.02%542301121/0/00537.600000Personal AutoTwo-Door Car
3UH35128OregonFCollege2580706.30%712102141/1/001027.200000Personal AutoLuxury Car
4WH52799ArizonaFCollege380812.21%94903941/0/00451.200000Corporate AutoTwo-Door Car
....................................
991HV85198ArizonaMMaster847141.75%63513701/0/00185.667213Personal AutoFour-Door Car
992BS91566ArizonaFCollege543121.91%58161681/0/00140.747286Corporate AutoFour-Door Car
993IL40123NevadaFCollege568964.41%83640701/0/00471.050488Corporate AutoTwo-Door Car
994MY32149CaliforniaFMaster368672.38%0961/0/0028.460568Personal AutoTwo-Door Car
995SA91515CaliforniaMBachelor399258.39%01111/0/00700.349052Personal AutoSUV
\n", + "

996 rows × 11 columns

\n", + "
" + ], + "text/plain": [ + " customer st gender education customer_lifetime_value income \\\n", + "0 GS98873 Arizona F Bachelor 323912.47% 16061 \n", + "1 CW49887 California F Master 462680.11% 79487 \n", + "2 MY31220 California F College 899704.02% 54230 \n", + "3 UH35128 Oregon F College 2580706.30% 71210 \n", + "4 WH52799 Arizona F College 380812.21% 94903 \n", + ".. ... ... ... ... ... ... \n", + "991 HV85198 Arizona M Master 847141.75% 63513 \n", + "992 BS91566 Arizona F College 543121.91% 58161 \n", + "993 IL40123 Nevada F College 568964.41% 83640 \n", + "994 MY32149 California F Master 368672.38% 0 \n", + "995 SA91515 California M Bachelor 399258.39% 0 \n", + "\n", + " monthly_premium_auto number_of_open_complaints total_claim_amount \\\n", + "0 88 1/0/00 633.600000 \n", + "1 114 1/0/00 547.200000 \n", + "2 112 1/0/00 537.600000 \n", + "3 214 1/1/00 1027.200000 \n", + "4 94 1/0/00 451.200000 \n", + ".. ... ... ... \n", + "991 70 1/0/00 185.667213 \n", + "992 68 1/0/00 140.747286 \n", + "993 70 1/0/00 471.050488 \n", + "994 96 1/0/00 28.460568 \n", + "995 111 1/0/00 700.349052 \n", + "\n", + " policy_type vehicle_class \n", + "0 Personal Auto Four-Door Car \n", + "1 Special Auto SUV \n", + "2 Personal Auto Two-Door Car \n", + "3 Personal Auto Luxury Car \n", + "4 Corporate Auto Two-Door Car \n", + ".. ... ... \n", + "991 Personal Auto Four-Door Car \n", + "992 Corporate Auto Four-Door Car \n", + "993 Corporate Auto Two-Door Car \n", + "994 Personal Auto Two-Door Car \n", + "995 Personal Auto SUV \n", + "\n", + "[996 rows x 11 columns]" + ] + }, + "execution_count": 106, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# STANDARDIZE HEADER NAMES TABELA 2\n", + " \n", + "cols = []\n", + "for i in range(len(tab2_data.columns)): \n", + " cols.append(tab2_data.columns[i].lower().replace(' ', '_')) \n", + "tab2_data.columns = cols\n", + "tab2_data\n" + ] + }, + { + "cell_type": "code", + "execution_count": 107, + "id": "0fcf53af", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
customerstatecustomer_lifetime_valueeducationgenderincomemonthly_premium_autonumber_of_open_complaintspolicy_typetotal_claim_amountvehicle_class
0SA25987Washington3479.137523High School or BelowM01040Personal Auto499.200000Two-Door Car
1TB86706Arizona2502.637401MasterM0660Personal Auto3.468912Two-Door Car
2ZL73902Nevada3265.156348BachelorF25820820Personal Auto393.600000Four-Door Car
3KX23516California4455.843406High School or BelowF01210Personal Auto699.615192SUV
4FN77294California7704.958480High School or BelowM303661012Personal Auto484.800000SUV
....................................
7065LA72316California23405.987980BachelorM71941730Personal Auto198.234764Four-Door Car
7066PK87824California3096.511217CollegeF21604790Corporate Auto379.200000Four-Door Car
7067TD14365California8163.890428BachelorM0853Corporate Auto790.784983Four-Door Car
7068UP19263California7524.442436CollegeM21941960Personal Auto691.200000Four-Door Car
7069Y167826California2611.836866CollegeM0770Corporate Auto369.600000Two-Door Car
\n", + "

7070 rows × 11 columns

\n", + "
" + ], + "text/plain": [ + " customer state customer_lifetime_value education \\\n", + "0 SA25987 Washington 3479.137523 High School or Below \n", + "1 TB86706 Arizona 2502.637401 Master \n", + "2 ZL73902 Nevada 3265.156348 Bachelor \n", + "3 KX23516 California 4455.843406 High School or Below \n", + "4 FN77294 California 7704.958480 High School or Below \n", + "... ... ... ... ... \n", + "7065 LA72316 California 23405.987980 Bachelor \n", + "7066 PK87824 California 3096.511217 College \n", + "7067 TD14365 California 8163.890428 Bachelor \n", + "7068 UP19263 California 7524.442436 College \n", + "7069 Y167826 California 2611.836866 College \n", + "\n", + " gender income monthly_premium_auto number_of_open_complaints \\\n", + "0 M 0 104 0 \n", + "1 M 0 66 0 \n", + "2 F 25820 82 0 \n", + "3 F 0 121 0 \n", + "4 M 30366 101 2 \n", + "... ... ... ... ... \n", + "7065 M 71941 73 0 \n", + "7066 F 21604 79 0 \n", + "7067 M 0 85 3 \n", + "7068 M 21941 96 0 \n", + "7069 M 0 77 0 \n", + "\n", + " policy_type total_claim_amount vehicle_class \n", + "0 Personal Auto 499.200000 Two-Door Car \n", + "1 Personal Auto 3.468912 Two-Door Car \n", + "2 Personal Auto 393.600000 Four-Door Car \n", + "3 Personal Auto 699.615192 SUV \n", + "4 Personal Auto 484.800000 SUV \n", + "... ... ... ... \n", + "7065 Personal Auto 198.234764 Four-Door Car \n", + "7066 Corporate Auto 379.200000 Four-Door Car \n", + "7067 Corporate Auto 790.784983 Four-Door Car \n", + "7068 Personal Auto 691.200000 Four-Door Car \n", + "7069 Corporate Auto 369.600000 Two-Door Car \n", + "\n", + "[7070 rows x 11 columns]" + ] + }, + "execution_count": 107, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# STANDARDIZE HEADER NAMES TABELA 3\n", + " \n", + "cols = []\n", + "for i in range(len(tab3_data.columns)): \n", + " cols.append(tab3_data.columns[i].lower().replace(' ', '_')) \n", + "tab3_data.columns = cols\n", + "tab3_data\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 90, + "id": "552a0b18", + "metadata": {}, + "outputs": [], + "source": [ + "# REARRANGE THE COLUMNS IN THE DATAFRAME AS NEEDED\n", + "\n", + "\n", + "\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 108, + "id": "a57f4cef", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
customerstgendereducationcustomer_lifetime_valueincomemonthly_premium_autonumber_of_open_complaintspolicy_typevehicle_classtotal_claim_amountstate
0RB50392WashingtonNaNMasterNaN0.01000.01/0/00Personal AutoFour-Door Car2.704934NaN
1QZ44356ArizonaFBachelor697953.59%0.094.01/0/00Personal AutoFour-Door Car1131.464935NaN
2AI49188NevadaFBachelor1288743.17%48767.0108.01/0/00Personal AutoTwo-Door Car566.472247NaN
3WW63253CaliforniaMBachelor764586.18%0.0106.01/0/00Corporate AutoSUV529.881344NaN
4GA49547WashingtonMHigh School or Below536307.65%36357.068.01/0/00Personal AutoFour-Door Car17.269323NaN
.......................................
7065LA72316NaNMBachelor23405.9879871941.073.00Personal AutoFour-Door Car198.234764California
7066PK87824NaNFCollege3096.51121721604.079.00Corporate AutoFour-Door Car379.200000California
7067TD14365NaNMBachelor8163.8904280.085.03Corporate AutoFour-Door Car790.784983California
7068UP19263NaNMCollege7524.44243621941.096.00Personal AutoFour-Door Car691.200000California
7069Y167826NaNMCollege2611.8368660.077.00Corporate AutoTwo-Door Car369.600000California
\n", + "

12074 rows × 12 columns

\n", + "
" + ], + "text/plain": [ + " customer st gender education \\\n", + "0 RB50392 Washington NaN Master \n", + "1 QZ44356 Arizona F Bachelor \n", + "2 AI49188 Nevada F Bachelor \n", + "3 WW63253 California M Bachelor \n", + "4 GA49547 Washington M High School or Below \n", + "... ... ... ... ... \n", + "7065 LA72316 NaN M Bachelor \n", + "7066 PK87824 NaN F College \n", + "7067 TD14365 NaN M Bachelor \n", + "7068 UP19263 NaN M College \n", + "7069 Y167826 NaN M College \n", + "\n", + " customer_lifetime_value income monthly_premium_auto \\\n", + "0 NaN 0.0 1000.0 \n", + "1 697953.59% 0.0 94.0 \n", + "2 1288743.17% 48767.0 108.0 \n", + "3 764586.18% 0.0 106.0 \n", + "4 536307.65% 36357.0 68.0 \n", + "... ... ... ... \n", + "7065 23405.98798 71941.0 73.0 \n", + "7066 3096.511217 21604.0 79.0 \n", + "7067 8163.890428 0.0 85.0 \n", + "7068 7524.442436 21941.0 96.0 \n", + "7069 2611.836866 0.0 77.0 \n", + "\n", + " number_of_open_complaints policy_type vehicle_class \\\n", + "0 1/0/00 Personal Auto Four-Door Car \n", + "1 1/0/00 Personal Auto Four-Door Car \n", + "2 1/0/00 Personal Auto Two-Door Car \n", + "3 1/0/00 Corporate Auto SUV \n", + "4 1/0/00 Personal Auto Four-Door Car \n", + "... ... ... ... \n", + "7065 0 Personal Auto Four-Door Car \n", + "7066 0 Corporate Auto Four-Door Car \n", + "7067 3 Corporate Auto Four-Door Car \n", + "7068 0 Personal Auto Four-Door Car \n", + "7069 0 Corporate Auto Two-Door Car \n", + "\n", + " total_claim_amount state \n", + "0 2.704934 NaN \n", + "1 1131.464935 NaN \n", + "2 566.472247 NaN \n", + "3 529.881344 NaN \n", + "4 17.269323 NaN \n", + "... ... ... \n", + "7065 198.234764 California \n", + "7066 379.200000 California \n", + "7067 790.784983 California \n", + "7068 691.200000 California \n", + "7069 369.600000 California \n", + "\n", + "[12074 rows x 12 columns]" + ] + }, + "execution_count": 108, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# CONCATENATE THE THREE DATAFRAMES\n", + "\n", + "datatotal_data = pd.concat([tab1_data,tab2_data,tab3_data], axis = 0) \n", + "\n", + "datatotal_data\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 92, + "id": "a2367b94", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "customer object\n", + "st object\n", + "gender object\n", + "education object\n", + "customer_lifetime_value object\n", + "income float64\n", + "monthly_premium_auto float64\n", + "number_of_open_complaints object\n", + "policy_type object\n", + "vehicle_class object\n", + "total_claim_amount float64\n", + "state object\n", + "dtype: object" + ] + }, + "execution_count": 92, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# QUESTION: WICH COLUMNS ARE NUMERICAL?\n", + "# ANSWER: Income, monthly_premium_auto, total_claim_amount\n", + "\n", + "# QUESTION: WICH COLUMNS ARE CATEGORICAL?\n", + "# ANSWER: customer, st, gender, education, customer_lifetime_value, number_of_open_complaints, policy_type, vehicle_class,state\n", + "\n", + "# UNDERSTAND THE MEANING OF ALL COLUMNS\n", + "# ANSWER:\n", + "\n", + "datatotal_data.dtypes\n", + "\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 93, + "id": "32de16d5", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
customerstgendereducationcustomer_lifetime_valueincomemonthly_premium_autopolicy_typevehicle_classtotal_claim_amountstate
0RB50392WashingtonNaNMasterNaN0.01000.0Personal AutoFour-Door Car2.704934NaN
1QZ44356ArizonaFBachelor697953.59%0.094.0Personal AutoFour-Door Car1131.464935NaN
2AI49188NevadaFBachelor1288743.17%48767.0108.0Personal AutoTwo-Door Car566.472247NaN
3WW63253CaliforniaMBachelor764586.18%0.0106.0Corporate AutoSUV529.881344NaN
4GA49547WashingtonMHigh School or Below536307.65%36357.068.0Personal AutoFour-Door Car17.269323NaN
....................................
7065LA72316NaNMBachelor23405.9879871941.073.0Personal AutoFour-Door Car198.234764California
7066PK87824NaNFCollege3096.51121721604.079.0Corporate AutoFour-Door Car379.200000California
7067TD14365NaNMBachelor8163.8904280.085.0Corporate AutoFour-Door Car790.784983California
7068UP19263NaNMCollege7524.44243621941.096.0Personal AutoFour-Door Car691.200000California
7069Y167826NaNMCollege2611.8368660.077.0Corporate AutoTwo-Door Car369.600000California
\n", + "

12074 rows × 11 columns

\n", + "
" + ], + "text/plain": [ + " customer st gender education \\\n", + "0 RB50392 Washington NaN Master \n", + "1 QZ44356 Arizona F Bachelor \n", + "2 AI49188 Nevada F Bachelor \n", + "3 WW63253 California M Bachelor \n", + "4 GA49547 Washington M High School or Below \n", + "... ... ... ... ... \n", + "7065 LA72316 NaN M Bachelor \n", + "7066 PK87824 NaN F College \n", + "7067 TD14365 NaN M Bachelor \n", + "7068 UP19263 NaN M College \n", + "7069 Y167826 NaN M College \n", + "\n", + " customer_lifetime_value income monthly_premium_auto policy_type \\\n", + "0 NaN 0.0 1000.0 Personal Auto \n", + "1 697953.59% 0.0 94.0 Personal Auto \n", + "2 1288743.17% 48767.0 108.0 Personal Auto \n", + "3 764586.18% 0.0 106.0 Corporate Auto \n", + "4 536307.65% 36357.0 68.0 Personal Auto \n", + "... ... ... ... ... \n", + "7065 23405.98798 71941.0 73.0 Personal Auto \n", + "7066 3096.511217 21604.0 79.0 Corporate Auto \n", + "7067 8163.890428 0.0 85.0 Corporate Auto \n", + "7068 7524.442436 21941.0 96.0 Personal Auto \n", + "7069 2611.836866 0.0 77.0 Corporate Auto \n", + "\n", + " vehicle_class total_claim_amount state \n", + "0 Four-Door Car 2.704934 NaN \n", + "1 Four-Door Car 1131.464935 NaN \n", + "2 Two-Door Car 566.472247 NaN \n", + "3 SUV 529.881344 NaN \n", + "4 Four-Door Car 17.269323 NaN \n", + "... ... ... ... \n", + "7065 Four-Door Car 198.234764 California \n", + "7066 Four-Door Car 379.200000 California \n", + "7067 Four-Door Car 790.784983 California \n", + "7068 Four-Door Car 691.200000 California \n", + "7069 Two-Door Car 369.600000 California \n", + "\n", + "[12074 rows x 11 columns]" + ] + }, + "execution_count": 93, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + " # DELETE THE COLUMN EDUCATION AND THE NUMBER OF OPEN COMPLAINTS FROM THE DATAFRAME\n", + " \n", + "totaldata_data = datatotal_data.drop(['education'], axis = 1)\n", + "totaldata_data\n", + "\n", + "totaldata_data = datatotal_data.drop(['number_of_open_complaints'], axis = 1)\n", + "totaldata_data\n" + ] + }, + { + "cell_type": "code", + "execution_count": 96, + "id": "854a8c46", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
customerstgendereducationcustomer_lifetime_valueincomemonthly_premium_autopolicy_typevehicle_classtotal_claim_amountstate
0RB50392WashingtonNaNMasterNaN0.01000.0Personal AutoFour-Door Car2.704934NaN
1QZ44356ArizonaFBachelor697953.59%0.094.0Personal AutoFour-Door Car1131.464935NaN
2AI49188NevadaFBachelor1288743.17%48767.0108.0Personal AutoTwo-Door Car566.472247NaN
3WW63253CaliforniaMBachelor764586.18%0.0106.0Corporate AutoSUV529.881344NaN
4GA49547WashingtonMHigh School or Below536307.65%36357.068.0Personal AutoFour-Door Car17.269323NaN
....................................
7065LA72316NaNMBachelor23405.9879871941.073.0Personal AutoFour-Door Car198.234764California
7066PK87824NaNFCollege3096.51121721604.079.0Corporate AutoFour-Door Car379.200000California
7067TD14365NaNMBachelor8163.8904280.085.0Corporate AutoFour-Door Car790.784983California
7068UP19263NaNMCollege7524.44243621941.096.0Personal AutoFour-Door Car691.200000California
7069Y167826NaNMCollege2611.8368660.077.0Corporate AutoTwo-Door Car369.600000California
\n", + "

9135 rows × 11 columns

\n", + "
" + ], + "text/plain": [ + " customer st gender education \\\n", + "0 RB50392 Washington NaN Master \n", + "1 QZ44356 Arizona F Bachelor \n", + "2 AI49188 Nevada F Bachelor \n", + "3 WW63253 California M Bachelor \n", + "4 GA49547 Washington M High School or Below \n", + "... ... ... ... ... \n", + "7065 LA72316 NaN M Bachelor \n", + "7066 PK87824 NaN F College \n", + "7067 TD14365 NaN M Bachelor \n", + "7068 UP19263 NaN M College \n", + "7069 Y167826 NaN M College \n", + "\n", + " customer_lifetime_value income monthly_premium_auto policy_type \\\n", + "0 NaN 0.0 1000.0 Personal Auto \n", + "1 697953.59% 0.0 94.0 Personal Auto \n", + "2 1288743.17% 48767.0 108.0 Personal Auto \n", + "3 764586.18% 0.0 106.0 Corporate Auto \n", + "4 536307.65% 36357.0 68.0 Personal Auto \n", + "... ... ... ... ... \n", + "7065 23405.98798 71941.0 73.0 Personal Auto \n", + "7066 3096.511217 21604.0 79.0 Corporate Auto \n", + "7067 8163.890428 0.0 85.0 Corporate Auto \n", + "7068 7524.442436 21941.0 96.0 Personal Auto \n", + "7069 2611.836866 0.0 77.0 Corporate Auto \n", + "\n", + " vehicle_class total_claim_amount state \n", + "0 Four-Door Car 2.704934 NaN \n", + "1 Four-Door Car 1131.464935 NaN \n", + "2 Two-Door Car 566.472247 NaN \n", + "3 SUV 529.881344 NaN \n", + "4 Four-Door Car 17.269323 NaN \n", + "... ... ... ... \n", + "7065 Four-Door Car 198.234764 California \n", + "7066 Four-Door Car 379.200000 California \n", + "7067 Four-Door Car 790.784983 California \n", + "7068 Four-Door Car 691.200000 California \n", + "7069 Two-Door Car 369.600000 California \n", + "\n", + "[9135 rows x 11 columns]" + ] + }, + "execution_count": 96, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# CHECK FOR DUPLICATE ROWS IN THE DATA AND REMOVE IF ANY\n", + "\n", + "totaldata_data.duplicated() \n", + "\n", + "totaldata_data[totaldata_data.duplicated()] \n", + "\n", + "totaldata_data = totaldata_data.drop_duplicates()\n", + "\n", + "totaldata_data\n", + "\n", + "\n", + "\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 111, + "id": "90edf4ee", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
customerstgendereducationcustomer_lifetime_valueincomemonthly_premium_autonumber_of_open_complaintspolicy_typevehicle_classtotal_claim_amountstate
\n", + "
" + ], + "text/plain": [ + "Empty DataFrame\n", + "Columns: [customer, st, gender, education, customer_lifetime_value, income, monthly_premium_auto, number_of_open_complaints, policy_type, vehicle_class, total_claim_amount, state]\n", + "Index: []" + ] + }, + "execution_count": 111, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + " # Filter out the data for customers who have an income of 0 or less.\n", + " \n", + "datatotal_data[datatotal_data['income'] < 0]\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.4" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/lab-customer-analysis-round-1.ipynb b/lab-customer-analysis-round-1.ipynb new file mode 100644 index 0000000..0fa7fff --- /dev/null +++ b/lab-customer-analysis-round-1.ipynb @@ -0,0 +1,2543 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 101, + "id": "c43915d2", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
CustomerSTGENDEREducationCustomer Lifetime ValueIncomeMonthly Premium AutoNumber of Open ComplaintsPolicy TypeVehicle ClassTotal Claim Amount
0RB50392WashingtonNaNMasterNaN0.01000.01/0/00Personal AutoFour-Door Car2.704934
1QZ44356ArizonaFBachelor697953.59%0.094.01/0/00Personal AutoFour-Door Car1131.464935
2AI49188NevadaFBachelor1288743.17%48767.0108.01/0/00Personal AutoTwo-Door Car566.472247
3WW63253CaliforniaMBachelor764586.18%0.0106.01/0/00Corporate AutoSUV529.881344
4GA49547WashingtonMHigh School or Below536307.65%36357.068.01/0/00Personal AutoFour-Door Car17.269323
....................................
4003NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
4004NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
4005NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
4006NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
4007NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
\n", + "

4008 rows × 11 columns

\n", + "
" + ], + "text/plain": [ + " Customer ST GENDER Education \\\n", + "0 RB50392 Washington NaN Master \n", + "1 QZ44356 Arizona F Bachelor \n", + "2 AI49188 Nevada F Bachelor \n", + "3 WW63253 California M Bachelor \n", + "4 GA49547 Washington M High School or Below \n", + "... ... ... ... ... \n", + "4003 NaN NaN NaN NaN \n", + "4004 NaN NaN NaN NaN \n", + "4005 NaN NaN NaN NaN \n", + "4006 NaN NaN NaN NaN \n", + "4007 NaN NaN NaN NaN \n", + "\n", + " Customer Lifetime Value Income Monthly Premium Auto \\\n", + "0 NaN 0.0 1000.0 \n", + "1 697953.59% 0.0 94.0 \n", + "2 1288743.17% 48767.0 108.0 \n", + "3 764586.18% 0.0 106.0 \n", + "4 536307.65% 36357.0 68.0 \n", + "... ... ... ... \n", + "4003 NaN NaN NaN \n", + "4004 NaN NaN NaN \n", + "4005 NaN NaN NaN \n", + "4006 NaN NaN NaN \n", + "4007 NaN NaN NaN \n", + "\n", + " Number of Open Complaints Policy Type Vehicle Class \\\n", + "0 1/0/00 Personal Auto Four-Door Car \n", + "1 1/0/00 Personal Auto Four-Door Car \n", + "2 1/0/00 Personal Auto Two-Door Car \n", + "3 1/0/00 Corporate Auto SUV \n", + "4 1/0/00 Personal Auto Four-Door Car \n", + "... ... ... ... \n", + "4003 NaN NaN NaN \n", + "4004 NaN NaN NaN \n", + "4005 NaN NaN NaN \n", + "4006 NaN NaN NaN \n", + "4007 NaN NaN NaN \n", + "\n", + " Total Claim Amount \n", + "0 2.704934 \n", + "1 1131.464935 \n", + "2 566.472247 \n", + "3 529.881344 \n", + "4 17.269323 \n", + "... ... \n", + "4003 NaN \n", + "4004 NaN \n", + "4005 NaN \n", + "4006 NaN \n", + "4007 NaN \n", + "\n", + "[4008 rows x 11 columns]" + ] + }, + "execution_count": 101, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import pandas as pd\n", + "\n", + "tab1_data = pd.read_csv(r'C:\\Users\\david\\OneDrive\\Ambiente de Trabalho\\Iron Hack\\ficheiros para LAB PANDA 1\\file1.csv')\n", + "\n", + "tab1_data\n", + "\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 102, + "id": "297404d5", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
CustomerSTGENDEREducationCustomer Lifetime ValueIncomeMonthly Premium AutoNumber of Open ComplaintsTotal Claim AmountPolicy TypeVehicle Class
0GS98873ArizonaFBachelor323912.47%16061881/0/00633.600000Personal AutoFour-Door Car
1CW49887CaliforniaFMaster462680.11%794871141/0/00547.200000Special AutoSUV
2MY31220CaliforniaFCollege899704.02%542301121/0/00537.600000Personal AutoTwo-Door Car
3UH35128OregonFCollege2580706.30%712102141/1/001027.200000Personal AutoLuxury Car
4WH52799ArizonaFCollege380812.21%94903941/0/00451.200000Corporate AutoTwo-Door Car
....................................
991HV85198ArizonaMMaster847141.75%63513701/0/00185.667213Personal AutoFour-Door Car
992BS91566ArizonaFCollege543121.91%58161681/0/00140.747286Corporate AutoFour-Door Car
993IL40123NevadaFCollege568964.41%83640701/0/00471.050488Corporate AutoTwo-Door Car
994MY32149CaliforniaFMaster368672.38%0961/0/0028.460568Personal AutoTwo-Door Car
995SA91515CaliforniaMBachelor399258.39%01111/0/00700.349052Personal AutoSUV
\n", + "

996 rows × 11 columns

\n", + "
" + ], + "text/plain": [ + " Customer ST GENDER Education Customer Lifetime Value Income \\\n", + "0 GS98873 Arizona F Bachelor 323912.47% 16061 \n", + "1 CW49887 California F Master 462680.11% 79487 \n", + "2 MY31220 California F College 899704.02% 54230 \n", + "3 UH35128 Oregon F College 2580706.30% 71210 \n", + "4 WH52799 Arizona F College 380812.21% 94903 \n", + ".. ... ... ... ... ... ... \n", + "991 HV85198 Arizona M Master 847141.75% 63513 \n", + "992 BS91566 Arizona F College 543121.91% 58161 \n", + "993 IL40123 Nevada F College 568964.41% 83640 \n", + "994 MY32149 California F Master 368672.38% 0 \n", + "995 SA91515 California M Bachelor 399258.39% 0 \n", + "\n", + " Monthly Premium Auto Number of Open Complaints Total Claim Amount \\\n", + "0 88 1/0/00 633.600000 \n", + "1 114 1/0/00 547.200000 \n", + "2 112 1/0/00 537.600000 \n", + "3 214 1/1/00 1027.200000 \n", + "4 94 1/0/00 451.200000 \n", + ".. ... ... ... \n", + "991 70 1/0/00 185.667213 \n", + "992 68 1/0/00 140.747286 \n", + "993 70 1/0/00 471.050488 \n", + "994 96 1/0/00 28.460568 \n", + "995 111 1/0/00 700.349052 \n", + "\n", + " Policy Type Vehicle Class \n", + "0 Personal Auto Four-Door Car \n", + "1 Special Auto SUV \n", + "2 Personal Auto Two-Door Car \n", + "3 Personal Auto Luxury Car \n", + "4 Corporate Auto Two-Door Car \n", + ".. ... ... \n", + "991 Personal Auto Four-Door Car \n", + "992 Corporate Auto Four-Door Car \n", + "993 Corporate Auto Two-Door Car \n", + "994 Personal Auto Two-Door Car \n", + "995 Personal Auto SUV \n", + "\n", + "[996 rows x 11 columns]" + ] + }, + "execution_count": 102, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "tab2_data = pd.read_csv(r'C:\\Users\\david\\OneDrive\\Ambiente de Trabalho\\Iron Hack\\ficheiros para LAB PANDA 1\\file2.csv')\n", + "\n", + "tab2_data\n" + ] + }, + { + "cell_type": "code", + "execution_count": 103, + "id": "5698462b", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
CustomerStateCustomer Lifetime ValueEducationGenderIncomeMonthly Premium AutoNumber of Open ComplaintsPolicy TypeTotal Claim AmountVehicle Class
0SA25987Washington3479.137523High School or BelowM01040Personal Auto499.200000Two-Door Car
1TB86706Arizona2502.637401MasterM0660Personal Auto3.468912Two-Door Car
2ZL73902Nevada3265.156348BachelorF25820820Personal Auto393.600000Four-Door Car
3KX23516California4455.843406High School or BelowF01210Personal Auto699.615192SUV
4FN77294California7704.958480High School or BelowM303661012Personal Auto484.800000SUV
....................................
7065LA72316California23405.987980BachelorM71941730Personal Auto198.234764Four-Door Car
7066PK87824California3096.511217CollegeF21604790Corporate Auto379.200000Four-Door Car
7067TD14365California8163.890428BachelorM0853Corporate Auto790.784983Four-Door Car
7068UP19263California7524.442436CollegeM21941960Personal Auto691.200000Four-Door Car
7069Y167826California2611.836866CollegeM0770Corporate Auto369.600000Two-Door Car
\n", + "

7070 rows × 11 columns

\n", + "
" + ], + "text/plain": [ + " Customer State Customer Lifetime Value Education \\\n", + "0 SA25987 Washington 3479.137523 High School or Below \n", + "1 TB86706 Arizona 2502.637401 Master \n", + "2 ZL73902 Nevada 3265.156348 Bachelor \n", + "3 KX23516 California 4455.843406 High School or Below \n", + "4 FN77294 California 7704.958480 High School or Below \n", + "... ... ... ... ... \n", + "7065 LA72316 California 23405.987980 Bachelor \n", + "7066 PK87824 California 3096.511217 College \n", + "7067 TD14365 California 8163.890428 Bachelor \n", + "7068 UP19263 California 7524.442436 College \n", + "7069 Y167826 California 2611.836866 College \n", + "\n", + " Gender Income Monthly Premium Auto Number of Open Complaints \\\n", + "0 M 0 104 0 \n", + "1 M 0 66 0 \n", + "2 F 25820 82 0 \n", + "3 F 0 121 0 \n", + "4 M 30366 101 2 \n", + "... ... ... ... ... \n", + "7065 M 71941 73 0 \n", + "7066 F 21604 79 0 \n", + "7067 M 0 85 3 \n", + "7068 M 21941 96 0 \n", + "7069 M 0 77 0 \n", + "\n", + " Policy Type Total Claim Amount Vehicle Class \n", + "0 Personal Auto 499.200000 Two-Door Car \n", + "1 Personal Auto 3.468912 Two-Door Car \n", + "2 Personal Auto 393.600000 Four-Door Car \n", + "3 Personal Auto 699.615192 SUV \n", + "4 Personal Auto 484.800000 SUV \n", + "... ... ... ... \n", + "7065 Personal Auto 198.234764 Four-Door Car \n", + "7066 Corporate Auto 379.200000 Four-Door Car \n", + "7067 Corporate Auto 790.784983 Four-Door Car \n", + "7068 Personal Auto 691.200000 Four-Door Car \n", + "7069 Corporate Auto 369.600000 Two-Door Car \n", + "\n", + "[7070 rows x 11 columns]" + ] + }, + "execution_count": 103, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "tab3_data = pd.read_csv(r'C:\\Users\\david\\OneDrive\\Ambiente de Trabalho\\Iron Hack\\ficheiros para LAB PANDA 1\\file3.csv')\n", + "\n", + "tab3_data\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 104, + "id": "10a878b6", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(7070, 11)" + ] + }, + "execution_count": 104, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + " # SHOW THE DATAFRAME'S SHAPE\n", + "\n", + "tab1_data.shape\n", + "tab2_data.shape\n", + "tab3_data.shape\n" + ] + }, + { + "cell_type": "code", + "execution_count": 105, + "id": "7eaccb15", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
customerstgendereducationcustomer_lifetime_valueincomemonthly_premium_autonumber_of_open_complaintspolicy_typevehicle_classtotal_claim_amount
0RB50392WashingtonNaNMasterNaN0.01000.01/0/00Personal AutoFour-Door Car2.704934
1QZ44356ArizonaFBachelor697953.59%0.094.01/0/00Personal AutoFour-Door Car1131.464935
2AI49188NevadaFBachelor1288743.17%48767.0108.01/0/00Personal AutoTwo-Door Car566.472247
3WW63253CaliforniaMBachelor764586.18%0.0106.01/0/00Corporate AutoSUV529.881344
4GA49547WashingtonMHigh School or Below536307.65%36357.068.01/0/00Personal AutoFour-Door Car17.269323
....................................
4003NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
4004NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
4005NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
4006NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
4007NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
\n", + "

4008 rows × 11 columns

\n", + "
" + ], + "text/plain": [ + " customer st gender education \\\n", + "0 RB50392 Washington NaN Master \n", + "1 QZ44356 Arizona F Bachelor \n", + "2 AI49188 Nevada F Bachelor \n", + "3 WW63253 California M Bachelor \n", + "4 GA49547 Washington M High School or Below \n", + "... ... ... ... ... \n", + "4003 NaN NaN NaN NaN \n", + "4004 NaN NaN NaN NaN \n", + "4005 NaN NaN NaN NaN \n", + "4006 NaN NaN NaN NaN \n", + "4007 NaN NaN NaN NaN \n", + "\n", + " customer_lifetime_value income monthly_premium_auto \\\n", + "0 NaN 0.0 1000.0 \n", + "1 697953.59% 0.0 94.0 \n", + "2 1288743.17% 48767.0 108.0 \n", + "3 764586.18% 0.0 106.0 \n", + "4 536307.65% 36357.0 68.0 \n", + "... ... ... ... \n", + "4003 NaN NaN NaN \n", + "4004 NaN NaN NaN \n", + "4005 NaN NaN NaN \n", + "4006 NaN NaN NaN \n", + "4007 NaN NaN NaN \n", + "\n", + " number_of_open_complaints policy_type vehicle_class \\\n", + "0 1/0/00 Personal Auto Four-Door Car \n", + "1 1/0/00 Personal Auto Four-Door Car \n", + "2 1/0/00 Personal Auto Two-Door Car \n", + "3 1/0/00 Corporate Auto SUV \n", + "4 1/0/00 Personal Auto Four-Door Car \n", + "... ... ... ... \n", + "4003 NaN NaN NaN \n", + "4004 NaN NaN NaN \n", + "4005 NaN NaN NaN \n", + "4006 NaN NaN NaN \n", + "4007 NaN NaN NaN \n", + "\n", + " total_claim_amount \n", + "0 2.704934 \n", + "1 1131.464935 \n", + "2 566.472247 \n", + "3 529.881344 \n", + "4 17.269323 \n", + "... ... \n", + "4003 NaN \n", + "4004 NaN \n", + "4005 NaN \n", + "4006 NaN \n", + "4007 NaN \n", + "\n", + "[4008 rows x 11 columns]" + ] + }, + "execution_count": 105, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + " # STANDARDIZE HEADER NAMES TABELA 1\n", + " \n", + "cols = []\n", + "for i in range(len(tab1_data.columns)): \n", + " cols.append(tab1_data.columns[i].lower().replace(' ', '_')) \n", + "tab1_data.columns = cols\n", + "tab1_data\n" + ] + }, + { + "cell_type": "code", + "execution_count": 106, + "id": "bb444eec", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
customerstgendereducationcustomer_lifetime_valueincomemonthly_premium_autonumber_of_open_complaintstotal_claim_amountpolicy_typevehicle_class
0GS98873ArizonaFBachelor323912.47%16061881/0/00633.600000Personal AutoFour-Door Car
1CW49887CaliforniaFMaster462680.11%794871141/0/00547.200000Special AutoSUV
2MY31220CaliforniaFCollege899704.02%542301121/0/00537.600000Personal AutoTwo-Door Car
3UH35128OregonFCollege2580706.30%712102141/1/001027.200000Personal AutoLuxury Car
4WH52799ArizonaFCollege380812.21%94903941/0/00451.200000Corporate AutoTwo-Door Car
....................................
991HV85198ArizonaMMaster847141.75%63513701/0/00185.667213Personal AutoFour-Door Car
992BS91566ArizonaFCollege543121.91%58161681/0/00140.747286Corporate AutoFour-Door Car
993IL40123NevadaFCollege568964.41%83640701/0/00471.050488Corporate AutoTwo-Door Car
994MY32149CaliforniaFMaster368672.38%0961/0/0028.460568Personal AutoTwo-Door Car
995SA91515CaliforniaMBachelor399258.39%01111/0/00700.349052Personal AutoSUV
\n", + "

996 rows × 11 columns

\n", + "
" + ], + "text/plain": [ + " customer st gender education customer_lifetime_value income \\\n", + "0 GS98873 Arizona F Bachelor 323912.47% 16061 \n", + "1 CW49887 California F Master 462680.11% 79487 \n", + "2 MY31220 California F College 899704.02% 54230 \n", + "3 UH35128 Oregon F College 2580706.30% 71210 \n", + "4 WH52799 Arizona F College 380812.21% 94903 \n", + ".. ... ... ... ... ... ... \n", + "991 HV85198 Arizona M Master 847141.75% 63513 \n", + "992 BS91566 Arizona F College 543121.91% 58161 \n", + "993 IL40123 Nevada F College 568964.41% 83640 \n", + "994 MY32149 California F Master 368672.38% 0 \n", + "995 SA91515 California M Bachelor 399258.39% 0 \n", + "\n", + " monthly_premium_auto number_of_open_complaints total_claim_amount \\\n", + "0 88 1/0/00 633.600000 \n", + "1 114 1/0/00 547.200000 \n", + "2 112 1/0/00 537.600000 \n", + "3 214 1/1/00 1027.200000 \n", + "4 94 1/0/00 451.200000 \n", + ".. ... ... ... \n", + "991 70 1/0/00 185.667213 \n", + "992 68 1/0/00 140.747286 \n", + "993 70 1/0/00 471.050488 \n", + "994 96 1/0/00 28.460568 \n", + "995 111 1/0/00 700.349052 \n", + "\n", + " policy_type vehicle_class \n", + "0 Personal Auto Four-Door Car \n", + "1 Special Auto SUV \n", + "2 Personal Auto Two-Door Car \n", + "3 Personal Auto Luxury Car \n", + "4 Corporate Auto Two-Door Car \n", + ".. ... ... \n", + "991 Personal Auto Four-Door Car \n", + "992 Corporate Auto Four-Door Car \n", + "993 Corporate Auto Two-Door Car \n", + "994 Personal Auto Two-Door Car \n", + "995 Personal Auto SUV \n", + "\n", + "[996 rows x 11 columns]" + ] + }, + "execution_count": 106, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# STANDARDIZE HEADER NAMES TABELA 2\n", + " \n", + "cols = []\n", + "for i in range(len(tab2_data.columns)): \n", + " cols.append(tab2_data.columns[i].lower().replace(' ', '_')) \n", + "tab2_data.columns = cols\n", + "tab2_data\n" + ] + }, + { + "cell_type": "code", + "execution_count": 107, + "id": "0fcf53af", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
customerstatecustomer_lifetime_valueeducationgenderincomemonthly_premium_autonumber_of_open_complaintspolicy_typetotal_claim_amountvehicle_class
0SA25987Washington3479.137523High School or BelowM01040Personal Auto499.200000Two-Door Car
1TB86706Arizona2502.637401MasterM0660Personal Auto3.468912Two-Door Car
2ZL73902Nevada3265.156348BachelorF25820820Personal Auto393.600000Four-Door Car
3KX23516California4455.843406High School or BelowF01210Personal Auto699.615192SUV
4FN77294California7704.958480High School or BelowM303661012Personal Auto484.800000SUV
....................................
7065LA72316California23405.987980BachelorM71941730Personal Auto198.234764Four-Door Car
7066PK87824California3096.511217CollegeF21604790Corporate Auto379.200000Four-Door Car
7067TD14365California8163.890428BachelorM0853Corporate Auto790.784983Four-Door Car
7068UP19263California7524.442436CollegeM21941960Personal Auto691.200000Four-Door Car
7069Y167826California2611.836866CollegeM0770Corporate Auto369.600000Two-Door Car
\n", + "

7070 rows × 11 columns

\n", + "
" + ], + "text/plain": [ + " customer state customer_lifetime_value education \\\n", + "0 SA25987 Washington 3479.137523 High School or Below \n", + "1 TB86706 Arizona 2502.637401 Master \n", + "2 ZL73902 Nevada 3265.156348 Bachelor \n", + "3 KX23516 California 4455.843406 High School or Below \n", + "4 FN77294 California 7704.958480 High School or Below \n", + "... ... ... ... ... \n", + "7065 LA72316 California 23405.987980 Bachelor \n", + "7066 PK87824 California 3096.511217 College \n", + "7067 TD14365 California 8163.890428 Bachelor \n", + "7068 UP19263 California 7524.442436 College \n", + "7069 Y167826 California 2611.836866 College \n", + "\n", + " gender income monthly_premium_auto number_of_open_complaints \\\n", + "0 M 0 104 0 \n", + "1 M 0 66 0 \n", + "2 F 25820 82 0 \n", + "3 F 0 121 0 \n", + "4 M 30366 101 2 \n", + "... ... ... ... ... \n", + "7065 M 71941 73 0 \n", + "7066 F 21604 79 0 \n", + "7067 M 0 85 3 \n", + "7068 M 21941 96 0 \n", + "7069 M 0 77 0 \n", + "\n", + " policy_type total_claim_amount vehicle_class \n", + "0 Personal Auto 499.200000 Two-Door Car \n", + "1 Personal Auto 3.468912 Two-Door Car \n", + "2 Personal Auto 393.600000 Four-Door Car \n", + "3 Personal Auto 699.615192 SUV \n", + "4 Personal Auto 484.800000 SUV \n", + "... ... ... ... \n", + "7065 Personal Auto 198.234764 Four-Door Car \n", + "7066 Corporate Auto 379.200000 Four-Door Car \n", + "7067 Corporate Auto 790.784983 Four-Door Car \n", + "7068 Personal Auto 691.200000 Four-Door Car \n", + "7069 Corporate Auto 369.600000 Two-Door Car \n", + "\n", + "[7070 rows x 11 columns]" + ] + }, + "execution_count": 107, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# STANDARDIZE HEADER NAMES TABELA 3\n", + " \n", + "cols = []\n", + "for i in range(len(tab3_data.columns)): \n", + " cols.append(tab3_data.columns[i].lower().replace(' ', '_')) \n", + "tab3_data.columns = cols\n", + "tab3_data\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 90, + "id": "552a0b18", + "metadata": {}, + "outputs": [], + "source": [ + "# REARRANGE THE COLUMNS IN THE DATAFRAME AS NEEDED\n", + "\n", + "\n", + "\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 108, + "id": "a57f4cef", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
customerstgendereducationcustomer_lifetime_valueincomemonthly_premium_autonumber_of_open_complaintspolicy_typevehicle_classtotal_claim_amountstate
0RB50392WashingtonNaNMasterNaN0.01000.01/0/00Personal AutoFour-Door Car2.704934NaN
1QZ44356ArizonaFBachelor697953.59%0.094.01/0/00Personal AutoFour-Door Car1131.464935NaN
2AI49188NevadaFBachelor1288743.17%48767.0108.01/0/00Personal AutoTwo-Door Car566.472247NaN
3WW63253CaliforniaMBachelor764586.18%0.0106.01/0/00Corporate AutoSUV529.881344NaN
4GA49547WashingtonMHigh School or Below536307.65%36357.068.01/0/00Personal AutoFour-Door Car17.269323NaN
.......................................
7065LA72316NaNMBachelor23405.9879871941.073.00Personal AutoFour-Door Car198.234764California
7066PK87824NaNFCollege3096.51121721604.079.00Corporate AutoFour-Door Car379.200000California
7067TD14365NaNMBachelor8163.8904280.085.03Corporate AutoFour-Door Car790.784983California
7068UP19263NaNMCollege7524.44243621941.096.00Personal AutoFour-Door Car691.200000California
7069Y167826NaNMCollege2611.8368660.077.00Corporate AutoTwo-Door Car369.600000California
\n", + "

12074 rows × 12 columns

\n", + "
" + ], + "text/plain": [ + " customer st gender education \\\n", + "0 RB50392 Washington NaN Master \n", + "1 QZ44356 Arizona F Bachelor \n", + "2 AI49188 Nevada F Bachelor \n", + "3 WW63253 California M Bachelor \n", + "4 GA49547 Washington M High School or Below \n", + "... ... ... ... ... \n", + "7065 LA72316 NaN M Bachelor \n", + "7066 PK87824 NaN F College \n", + "7067 TD14365 NaN M Bachelor \n", + "7068 UP19263 NaN M College \n", + "7069 Y167826 NaN M College \n", + "\n", + " customer_lifetime_value income monthly_premium_auto \\\n", + "0 NaN 0.0 1000.0 \n", + "1 697953.59% 0.0 94.0 \n", + "2 1288743.17% 48767.0 108.0 \n", + "3 764586.18% 0.0 106.0 \n", + "4 536307.65% 36357.0 68.0 \n", + "... ... ... ... \n", + "7065 23405.98798 71941.0 73.0 \n", + "7066 3096.511217 21604.0 79.0 \n", + "7067 8163.890428 0.0 85.0 \n", + "7068 7524.442436 21941.0 96.0 \n", + "7069 2611.836866 0.0 77.0 \n", + "\n", + " number_of_open_complaints policy_type vehicle_class \\\n", + "0 1/0/00 Personal Auto Four-Door Car \n", + "1 1/0/00 Personal Auto Four-Door Car \n", + "2 1/0/00 Personal Auto Two-Door Car \n", + "3 1/0/00 Corporate Auto SUV \n", + "4 1/0/00 Personal Auto Four-Door Car \n", + "... ... ... ... \n", + "7065 0 Personal Auto Four-Door Car \n", + "7066 0 Corporate Auto Four-Door Car \n", + "7067 3 Corporate Auto Four-Door Car \n", + "7068 0 Personal Auto Four-Door Car \n", + "7069 0 Corporate Auto Two-Door Car \n", + "\n", + " total_claim_amount state \n", + "0 2.704934 NaN \n", + "1 1131.464935 NaN \n", + "2 566.472247 NaN \n", + "3 529.881344 NaN \n", + "4 17.269323 NaN \n", + "... ... ... \n", + "7065 198.234764 California \n", + "7066 379.200000 California \n", + "7067 790.784983 California \n", + "7068 691.200000 California \n", + "7069 369.600000 California \n", + "\n", + "[12074 rows x 12 columns]" + ] + }, + "execution_count": 108, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# CONCATENATE THE THREE DATAFRAMES\n", + "\n", + "datatotal_data = pd.concat([tab1_data,tab2_data,tab3_data], axis = 0) \n", + "\n", + "datatotal_data\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 92, + "id": "a2367b94", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "customer object\n", + "st object\n", + "gender object\n", + "education object\n", + "customer_lifetime_value object\n", + "income float64\n", + "monthly_premium_auto float64\n", + "number_of_open_complaints object\n", + "policy_type object\n", + "vehicle_class object\n", + "total_claim_amount float64\n", + "state object\n", + "dtype: object" + ] + }, + "execution_count": 92, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# QUESTION: WICH COLUMNS ARE NUMERICAL?\n", + "# ANSWER: Income, monthly_premium_auto, total_claim_amount\n", + "\n", + "# QUESTION: WICH COLUMNS ARE CATEGORICAL?\n", + "# ANSWER: customer, st, gender, education, customer_lifetime_value, number_of_open_complaints, policy_type, vehicle_class,state\n", + "\n", + "# UNDERSTAND THE MEANING OF ALL COLUMNS\n", + "# ANSWER:\n", + "\n", + "datatotal_data.dtypes\n", + "\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 93, + "id": "32de16d5", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
customerstgendereducationcustomer_lifetime_valueincomemonthly_premium_autopolicy_typevehicle_classtotal_claim_amountstate
0RB50392WashingtonNaNMasterNaN0.01000.0Personal AutoFour-Door Car2.704934NaN
1QZ44356ArizonaFBachelor697953.59%0.094.0Personal AutoFour-Door Car1131.464935NaN
2AI49188NevadaFBachelor1288743.17%48767.0108.0Personal AutoTwo-Door Car566.472247NaN
3WW63253CaliforniaMBachelor764586.18%0.0106.0Corporate AutoSUV529.881344NaN
4GA49547WashingtonMHigh School or Below536307.65%36357.068.0Personal AutoFour-Door Car17.269323NaN
....................................
7065LA72316NaNMBachelor23405.9879871941.073.0Personal AutoFour-Door Car198.234764California
7066PK87824NaNFCollege3096.51121721604.079.0Corporate AutoFour-Door Car379.200000California
7067TD14365NaNMBachelor8163.8904280.085.0Corporate AutoFour-Door Car790.784983California
7068UP19263NaNMCollege7524.44243621941.096.0Personal AutoFour-Door Car691.200000California
7069Y167826NaNMCollege2611.8368660.077.0Corporate AutoTwo-Door Car369.600000California
\n", + "

12074 rows × 11 columns

\n", + "
" + ], + "text/plain": [ + " customer st gender education \\\n", + "0 RB50392 Washington NaN Master \n", + "1 QZ44356 Arizona F Bachelor \n", + "2 AI49188 Nevada F Bachelor \n", + "3 WW63253 California M Bachelor \n", + "4 GA49547 Washington M High School or Below \n", + "... ... ... ... ... \n", + "7065 LA72316 NaN M Bachelor \n", + "7066 PK87824 NaN F College \n", + "7067 TD14365 NaN M Bachelor \n", + "7068 UP19263 NaN M College \n", + "7069 Y167826 NaN M College \n", + "\n", + " customer_lifetime_value income monthly_premium_auto policy_type \\\n", + "0 NaN 0.0 1000.0 Personal Auto \n", + "1 697953.59% 0.0 94.0 Personal Auto \n", + "2 1288743.17% 48767.0 108.0 Personal Auto \n", + "3 764586.18% 0.0 106.0 Corporate Auto \n", + "4 536307.65% 36357.0 68.0 Personal Auto \n", + "... ... ... ... ... \n", + "7065 23405.98798 71941.0 73.0 Personal Auto \n", + "7066 3096.511217 21604.0 79.0 Corporate Auto \n", + "7067 8163.890428 0.0 85.0 Corporate Auto \n", + "7068 7524.442436 21941.0 96.0 Personal Auto \n", + "7069 2611.836866 0.0 77.0 Corporate Auto \n", + "\n", + " vehicle_class total_claim_amount state \n", + "0 Four-Door Car 2.704934 NaN \n", + "1 Four-Door Car 1131.464935 NaN \n", + "2 Two-Door Car 566.472247 NaN \n", + "3 SUV 529.881344 NaN \n", + "4 Four-Door Car 17.269323 NaN \n", + "... ... ... ... \n", + "7065 Four-Door Car 198.234764 California \n", + "7066 Four-Door Car 379.200000 California \n", + "7067 Four-Door Car 790.784983 California \n", + "7068 Four-Door Car 691.200000 California \n", + "7069 Two-Door Car 369.600000 California \n", + "\n", + "[12074 rows x 11 columns]" + ] + }, + "execution_count": 93, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + " # DELETE THE COLUMN EDUCATION AND THE NUMBER OF OPEN COMPLAINTS FROM THE DATAFRAME\n", + " \n", + "totaldata_data = datatotal_data.drop(['education'], axis = 1)\n", + "totaldata_data\n", + "\n", + "totaldata_data = datatotal_data.drop(['number_of_open_complaints'], axis = 1)\n", + "totaldata_data\n" + ] + }, + { + "cell_type": "code", + "execution_count": 96, + "id": "854a8c46", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
customerstgendereducationcustomer_lifetime_valueincomemonthly_premium_autopolicy_typevehicle_classtotal_claim_amountstate
0RB50392WashingtonNaNMasterNaN0.01000.0Personal AutoFour-Door Car2.704934NaN
1QZ44356ArizonaFBachelor697953.59%0.094.0Personal AutoFour-Door Car1131.464935NaN
2AI49188NevadaFBachelor1288743.17%48767.0108.0Personal AutoTwo-Door Car566.472247NaN
3WW63253CaliforniaMBachelor764586.18%0.0106.0Corporate AutoSUV529.881344NaN
4GA49547WashingtonMHigh School or Below536307.65%36357.068.0Personal AutoFour-Door Car17.269323NaN
....................................
7065LA72316NaNMBachelor23405.9879871941.073.0Personal AutoFour-Door Car198.234764California
7066PK87824NaNFCollege3096.51121721604.079.0Corporate AutoFour-Door Car379.200000California
7067TD14365NaNMBachelor8163.8904280.085.0Corporate AutoFour-Door Car790.784983California
7068UP19263NaNMCollege7524.44243621941.096.0Personal AutoFour-Door Car691.200000California
7069Y167826NaNMCollege2611.8368660.077.0Corporate AutoTwo-Door Car369.600000California
\n", + "

9135 rows × 11 columns

\n", + "
" + ], + "text/plain": [ + " customer st gender education \\\n", + "0 RB50392 Washington NaN Master \n", + "1 QZ44356 Arizona F Bachelor \n", + "2 AI49188 Nevada F Bachelor \n", + "3 WW63253 California M Bachelor \n", + "4 GA49547 Washington M High School or Below \n", + "... ... ... ... ... \n", + "7065 LA72316 NaN M Bachelor \n", + "7066 PK87824 NaN F College \n", + "7067 TD14365 NaN M Bachelor \n", + "7068 UP19263 NaN M College \n", + "7069 Y167826 NaN M College \n", + "\n", + " customer_lifetime_value income monthly_premium_auto policy_type \\\n", + "0 NaN 0.0 1000.0 Personal Auto \n", + "1 697953.59% 0.0 94.0 Personal Auto \n", + "2 1288743.17% 48767.0 108.0 Personal Auto \n", + "3 764586.18% 0.0 106.0 Corporate Auto \n", + "4 536307.65% 36357.0 68.0 Personal Auto \n", + "... ... ... ... ... \n", + "7065 23405.98798 71941.0 73.0 Personal Auto \n", + "7066 3096.511217 21604.0 79.0 Corporate Auto \n", + "7067 8163.890428 0.0 85.0 Corporate Auto \n", + "7068 7524.442436 21941.0 96.0 Personal Auto \n", + "7069 2611.836866 0.0 77.0 Corporate Auto \n", + "\n", + " vehicle_class total_claim_amount state \n", + "0 Four-Door Car 2.704934 NaN \n", + "1 Four-Door Car 1131.464935 NaN \n", + "2 Two-Door Car 566.472247 NaN \n", + "3 SUV 529.881344 NaN \n", + "4 Four-Door Car 17.269323 NaN \n", + "... ... ... ... \n", + "7065 Four-Door Car 198.234764 California \n", + "7066 Four-Door Car 379.200000 California \n", + "7067 Four-Door Car 790.784983 California \n", + "7068 Four-Door Car 691.200000 California \n", + "7069 Two-Door Car 369.600000 California \n", + "\n", + "[9135 rows x 11 columns]" + ] + }, + "execution_count": 96, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# CHECK FOR DUPLICATE ROWS IN THE DATA AND REMOVE IF ANY\n", + "\n", + "totaldata_data.duplicated() \n", + "\n", + "totaldata_data[totaldata_data.duplicated()] \n", + "\n", + "totaldata_data = totaldata_data.drop_duplicates()\n", + "\n", + "totaldata_data\n", + "\n", + "\n", + "\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 111, + "id": "90edf4ee", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
customerstgendereducationcustomer_lifetime_valueincomemonthly_premium_autonumber_of_open_complaintspolicy_typevehicle_classtotal_claim_amountstate
\n", + "
" + ], + "text/plain": [ + "Empty DataFrame\n", + "Columns: [customer, st, gender, education, customer_lifetime_value, income, monthly_premium_auto, number_of_open_complaints, policy_type, vehicle_class, total_claim_amount, state]\n", + "Index: []" + ] + }, + "execution_count": 111, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + " # Filter out the data for customers who have an income of 0 or less.\n", + " \n", + "datatotal_data[datatotal_data['income'] < 0]\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.4" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}