From f748777d3d0d8b0dc028427a0dd0400acabc008a Mon Sep 17 00:00:00 2001 From: LeonardoPagliacci Date: Fri, 6 Oct 2023 14:25:14 +0100 Subject: [PATCH] added the ypnb file with the solution of the lab --- .DS_Store | Bin 0 -> 8196 bytes ...ound1] leonardo_pagliacci-checkpoint.ipynb | 1547 +++++++++++++++++ ..._analysis_round1] leonardo_pagliacci.ipynb | 1547 +++++++++++++++++ files_for_lab/.DS_Store | Bin 0 -> 6148 bytes 4 files changed, 3094 insertions(+) create mode 100644 .DS_Store create mode 100644 .ipynb_checkpoints/[lab_customer_analysis_round1] leonardo_pagliacci-checkpoint.ipynb create mode 100644 [lab_customer_analysis_round1] leonardo_pagliacci.ipynb create mode 100644 files_for_lab/.DS_Store diff --git a/.DS_Store b/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..202ba5f668f6f1543cb994fa5e7f7235d058bc74 GIT binary patch literal 8196 zcmeHMOOFyk5U$=u8M11~#@%o+nZ)Y`0gb!r1rZP4j2Jzv8=PTyOm>(iFi19pgo_t7 z{uQqtym;_0cr!8aclh}FfiNHhvx(WnF4Fxq)m7cqU-!&V0}-)O-^&wah{!?_nBGG5 z2NC_EEXan|>;M`3DQM;^HQw$lSQ>?4z%XDKFbo(53&*eC*v8pjIdtpk-z0e}gprJ$V_4WO;DsBx?iJrJf`fy$NX6oV;u=xuFR)HqhC z+==PrgXx)>&QO?~9pl^5oLEtzsSN{$fkg&H?w+75HOZ$amcL*4@NKrQ&C6vs;4XCH zQhW2g@#${7-&@(=Y0bPFxFoOAt{?3B2T(dR5?V;FfKU+NeZtfrAHw~EE1 zZaUkD$EOaRP)IISsfM`fsE?uJ=m{yJUOdV#1w^@)-)!te{Acx;?%}kyI8e@bvo!jf!;@{Ld&r~A$gRiBixq*SaNCAg^t6b1CJ*3 zKCp(5Kg;BUfYn@iU@`-QUms4OfgpjK>!{M=gG%H?{a(rG9*S>|IS<@P+i{=x^+7d#`5q4=#}DgW z>EL^Hm^^#xhaMl4ct7-d(zl{Itdy0irZdA~{vfw!7xoLIJ$rbofcA~+H%FtCwR82_ zok!KJqP S5E_Vi5g=(Wm0{qYGVlwaFCujS literal 0 HcmV?d00001 diff --git a/.ipynb_checkpoints/[lab_customer_analysis_round1] leonardo_pagliacci-checkpoint.ipynb b/.ipynb_checkpoints/[lab_customer_analysis_round1] leonardo_pagliacci-checkpoint.ipynb new file mode 100644 index 0000000..cb4aab0 --- /dev/null +++ b/.ipynb_checkpoints/[lab_customer_analysis_round1] leonardo_pagliacci-checkpoint.ipynb @@ -0,0 +1,1547 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 347, + "id": "958305e9", + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n" + ] + }, + { + "cell_type": "code", + "execution_count": 348, + "id": "ff4186ae", + "metadata": {}, + "outputs": [], + "source": [ + "# Read the three files into python as dataframes\n", + "\n", + "cus_data1 = pd.read_csv(\"/Users/leozinho.air/Desktop/Ironhack_DA/class_03/Lab 1 - customer analysis/lab-customer-analysis-round-1/files_for_lab/csv_files/file1.csv\")\n", + "\n", + "cus_data2 = pd.read_csv(r\"/Users/leozinho.air/Desktop/Ironhack_DA/class_03/Lab 1 - customer analysis/lab-customer-analysis-round-1/files_for_lab/csv_files/file2.csv\")\n", + "\n", + "cus_data3 = pd.read_csv(r\"/Users/leozinho.air/Desktop/Ironhack_DA/class_03/Lab 1 - customer analysis/lab-customer-analysis-round-1/files_for_lab/csv_files/file3.csv\")\n", + "\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 349, + "id": "35c9e5a3", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(4008, 11)\n", + "(996, 11)\n", + "(7070, 11)\n" + ] + } + ], + "source": [ + "# Show the DataFrame's shape.\n", + "\n", + "print(cus_data1.shape)\n", + "\n", + "print(cus_data2.shape)\n", + "\n", + "print(cus_data3.shape)" + ] + }, + { + "cell_type": "code", + "execution_count": 350, + "id": "29b4433c", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
customerstatecustomer_lifetime_valueeducationgenderincomemonthly_premium_autonumber_of_open_complaintspolicy_typetotal_claim_amountvehicle_class
0SA25987Washington3479.137523High School or BelowM01040Personal Auto499.200000Two-Door Car
1TB86706Arizona2502.637401MasterM0660Personal Auto3.468912Two-Door Car
2ZL73902Nevada3265.156348BachelorF25820820Personal Auto393.600000Four-Door Car
3KX23516California4455.843406High School or BelowF01210Personal Auto699.615192SUV
4FN77294California7704.958480High School or BelowM303661012Personal Auto484.800000SUV
....................................
7065LA72316California23405.987980BachelorM71941730Personal Auto198.234764Four-Door Car
7066PK87824California3096.511217CollegeF21604790Corporate Auto379.200000Four-Door Car
7067TD14365California8163.890428BachelorM0853Corporate Auto790.784983Four-Door Car
7068UP19263California7524.442436CollegeM21941960Personal Auto691.200000Four-Door Car
7069Y167826California2611.836866CollegeM0770Corporate Auto369.600000Two-Door Car
\n", + "

7070 rows × 11 columns

\n", + "
" + ], + "text/plain": [ + " customer state customer_lifetime_value education \\\n", + "0 SA25987 Washington 3479.137523 High School or Below \n", + "1 TB86706 Arizona 2502.637401 Master \n", + "2 ZL73902 Nevada 3265.156348 Bachelor \n", + "3 KX23516 California 4455.843406 High School or Below \n", + "4 FN77294 California 7704.958480 High School or Below \n", + "... ... ... ... ... \n", + "7065 LA72316 California 23405.987980 Bachelor \n", + "7066 PK87824 California 3096.511217 College \n", + "7067 TD14365 California 8163.890428 Bachelor \n", + "7068 UP19263 California 7524.442436 College \n", + "7069 Y167826 California 2611.836866 College \n", + "\n", + " gender income monthly_premium_auto number_of_open_complaints \\\n", + "0 M 0 104 0 \n", + "1 M 0 66 0 \n", + "2 F 25820 82 0 \n", + "3 F 0 121 0 \n", + "4 M 30366 101 2 \n", + "... ... ... ... ... \n", + "7065 M 71941 73 0 \n", + "7066 F 21604 79 0 \n", + "7067 M 0 85 3 \n", + "7068 M 21941 96 0 \n", + "7069 M 0 77 0 \n", + "\n", + " policy_type total_claim_amount vehicle_class \n", + "0 Personal Auto 499.200000 Two-Door Car \n", + "1 Personal Auto 3.468912 Two-Door Car \n", + "2 Personal Auto 393.600000 Four-Door Car \n", + "3 Personal Auto 699.615192 SUV \n", + "4 Personal Auto 484.800000 SUV \n", + "... ... ... ... \n", + "7065 Personal Auto 198.234764 Four-Door Car \n", + "7066 Corporate Auto 379.200000 Four-Door Car \n", + "7067 Corporate Auto 790.784983 Four-Door Car \n", + "7068 Personal Auto 691.200000 Four-Door Car \n", + "7069 Corporate Auto 369.600000 Two-Door Car \n", + "\n", + "[7070 rows x 11 columns]" + ] + }, + "execution_count": 350, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Standardize header names.\n", + "\n", + "cols1 = []\n", + "\n", + "cols2 = []\n", + "\n", + "cols3 = []\n", + "\n", + "\n", + "for i in range(len(cus_data1.columns)):\n", + " cols1.append(cus_data1.columns[i].lower().replace(' ', '_'))\n", + "\n", + "cus_data1.columns = cols1\n", + "\n", + "cus_data1\n", + "\n", + "for i in range(len(cus_data2.columns)):\n", + " cols2.append(cus_data2.columns[i].lower().replace(' ', '_'))\n", + "\n", + "cus_data2.columns = cols2\n", + "\n", + "cus_data2\n", + "\n", + "for i in range(len(cus_data3.columns)):\n", + " cols3.append(cus_data3.columns[i].lower().replace(' ', '_'))\n", + "\n", + "cus_data3.columns = cols3\n", + "\n", + "cus_data3\n", + "\n", + " \n" + ] + }, + { + "cell_type": "code", + "execution_count": 351, + "id": "39a98573", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Index(['customer', 'st', 'gender', 'education', 'customer_lifetime_value',\n", + " 'income', 'monthly_premium_auto', 'number_of_open_complaints',\n", + " 'policy_type', 'vehicle_class', 'total_claim_amount'],\n", + " dtype='object')\n", + "Index(['customer', 'st', 'gender', 'education', 'customer_lifetime_value',\n", + " 'income', 'monthly_premium_auto', 'number_of_open_complaints',\n", + " 'policy_type', 'vehicle_class', 'total_claim_amount'],\n", + " dtype='object')\n", + "Index(['customer', 'st', 'gender', 'education', 'customer_lifetime_value',\n", + " 'income', 'monthly_premium_auto', 'number_of_open_complaints',\n", + " 'policy_type', 'vehicle_class', 'total_claim_amount'],\n", + " dtype='object')\n" + ] + } + ], + "source": [ + "# Rearrange the columns in the dataframe as needed\n", + "\n", + "# We use the columns of cus_data1 as the default columns for ours dataframes\n", + "\n", + "cus_data2 = cus_data2[['customer', 'st', 'gender', 'education', 'customer_lifetime_value',\n", + " 'income', 'monthly_premium_auto', 'number_of_open_complaints',\n", + " 'policy_type', 'vehicle_class', 'total_claim_amount']] # Changing the order of the columns\n", + "\n", + "cus_data2\n", + "\n", + "# Now i am going to do the same for cus_data3\n", + "\n", + "cus_data3['st'] = cus_data3['state']\n", + "cus_data3 = cus_data3.drop(columns ='state') # renaming the column 'state'\n", + "\n", + "cus_data3 # Thankfully renamed\n", + "\n", + "# Now let's order cus_data3\n", + "\n", + "desired_order = ['customer', 'st', 'gender', 'education', 'customer_lifetime_value',\n", + " 'income', 'monthly_premium_auto', 'number_of_open_complaints',\n", + " 'policy_type', 'vehicle_class', 'total_claim_amount']\n", + "cus_data3 = cus_data3[desired_order]\n", + "\n", + "# Checking if everything is ok\n", + "print(cus_data3.columns)\n", + "print(cus_data2.columns)\n", + "print(cus_data1.columns)" + ] + }, + { + "cell_type": "code", + "execution_count": 352, + "id": "7aadaafc", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
customerstgendereducationcustomer_lifetime_valueincomemonthly_premium_autonumber_of_open_complaintspolicy_typevehicle_classtotal_claim_amount
6970NA56063CaliforniaMCollege43019.012780.0132.00Corporate AutoSUV904.378268
6971CA62597CaliforniaFBachelor7482.8510410.0101.00Corporate AutoSUV978.257137
6972ZO23305CaliforniaMBachelor4929.54969925632.063.00Personal AutoTwo-Door Car351.270869
6973UD20999CaliforniaMMaster2537.6675110.068.00Corporate AutoFour-Door Car326.400000
6974FQ66351CaliforniaFBachelor5322.29168833681.0138.01Personal AutoSUV873.127345
....................................
7065LA72316CaliforniaMBachelor23405.9879871941.073.00Personal AutoFour-Door Car198.234764
7066PK87824CaliforniaFCollege3096.51121721604.079.00Corporate AutoFour-Door Car379.200000
7067TD14365CaliforniaMBachelor8163.8904280.085.03Corporate AutoFour-Door Car790.784983
7068UP19263CaliforniaMCollege7524.44243621941.096.00Personal AutoFour-Door Car691.200000
7069Y167826CaliforniaMCollege2611.8368660.077.00Corporate AutoTwo-Door Car369.600000
\n", + "

100 rows × 11 columns

\n", + "
" + ], + "text/plain": [ + " customer st gender education customer_lifetime_value income \\\n", + "6970 NA56063 California M College 43019.01278 0.0 \n", + "6971 CA62597 California F Bachelor 7482.851041 0.0 \n", + "6972 ZO23305 California M Bachelor 4929.549699 25632.0 \n", + "6973 UD20999 California M Master 2537.667511 0.0 \n", + "6974 FQ66351 California F Bachelor 5322.291688 33681.0 \n", + "... ... ... ... ... ... ... \n", + "7065 LA72316 California M Bachelor 23405.98798 71941.0 \n", + "7066 PK87824 California F College 3096.511217 21604.0 \n", + "7067 TD14365 California M Bachelor 8163.890428 0.0 \n", + "7068 UP19263 California M College 7524.442436 21941.0 \n", + "7069 Y167826 California M College 2611.836866 0.0 \n", + "\n", + " monthly_premium_auto number_of_open_complaints policy_type \\\n", + "6970 132.0 0 Corporate Auto \n", + "6971 101.0 0 Corporate Auto \n", + "6972 63.0 0 Personal Auto \n", + "6973 68.0 0 Corporate Auto \n", + "6974 138.0 1 Personal Auto \n", + "... ... ... ... \n", + "7065 73.0 0 Personal Auto \n", + "7066 79.0 0 Corporate Auto \n", + "7067 85.0 3 Corporate Auto \n", + "7068 96.0 0 Personal Auto \n", + "7069 77.0 0 Corporate Auto \n", + "\n", + " vehicle_class total_claim_amount \n", + "6970 SUV 904.378268 \n", + "6971 SUV 978.257137 \n", + "6972 Two-Door Car 351.270869 \n", + "6973 Four-Door Car 326.400000 \n", + "6974 SUV 873.127345 \n", + "... ... ... \n", + "7065 Four-Door Car 198.234764 \n", + "7066 Four-Door Car 379.200000 \n", + "7067 Four-Door Car 790.784983 \n", + "7068 Four-Door Car 691.200000 \n", + "7069 Two-Door Car 369.600000 \n", + "\n", + "[100 rows x 11 columns]" + ] + }, + "execution_count": 352, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Concatenate the three dataframes\n", + "\n", + "cus_data = pd.concat([cus_data1, cus_data2, cus_data3], axis = 0) # axis = 0 is not mandatory but is better putting it on\n", + "\n", + "cus_data.tail(100)" + ] + }, + { + "cell_type": "code", + "execution_count": 353, + "id": "47da75af", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "\"\\n\\n'income','monthly_premium_auto','total_claim_amount ' are numerical, the other ones are object (strings or whatsoever).\\n\\ncustomer object = Id referring to the customer\\nst object = State in which the car was bought\\ngender object = gender of the buyer\\neducation object = education of the buyer\\ncustomer_lifetime_value object = a metric that represents the total net profit a company can expect to generate from a customer throughout their entire relationship\\nincome float64 = annual income of the buyer\\nmonthly_premium_auto float64 = monthly premium that the buyer has to pay\\nnumber_of_open_complaints object = ...\\npolicy_type object = ...\\nvehicle_class object = ...\\ntotal_claim_amount float64 = ...\\n\\n\"" + ] + }, + "execution_count": 353, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Which columns are numerical?\n", + "# Which columns are categorial?\n", + "\n", + "cus_data.dtypes\n", + "\n", + "\n", + "'''\n", + "\n", + "'income','monthly_premium_auto','total_claim_amount ' are numerical, the other ones are object (strings or whatsoever).\n", + "\n", + "customer object = Id referring to the customer\n", + "st object = State in which the car was bought\n", + "gender object = gender of the buyer\n", + "education object = education of the buyer\n", + "customer_lifetime_value object = a metric that represents the total net profit a company can expect to generate from a customer throughout their entire relationship\n", + "income float64 = annual income of the buyer\n", + "monthly_premium_auto float64 = monthly premium that the buyer has to pay\n", + "number_of_open_complaints object = ...\n", + "policy_type object = ...\n", + "vehicle_class object = ...\n", + "total_claim_amount float64 = ...\n", + "\n", + "'''" + ] + }, + { + "cell_type": "code", + "execution_count": 354, + "id": "ef630730", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
customerstgendercustomer_lifetime_valueincomemonthly_premium_autopolicy_typevehicle_classtotal_claim_amount
0RB50392WashingtonNaNNaN0.01000.0Personal AutoFour-Door Car2.704934
1QZ44356ArizonaF697953.59%0.094.0Personal AutoFour-Door Car1131.464935
2AI49188NevadaF1288743.17%48767.0108.0Personal AutoTwo-Door Car566.472247
3WW63253CaliforniaM764586.18%0.0106.0Corporate AutoSUV529.881344
4GA49547WashingtonM536307.65%36357.068.0Personal AutoFour-Door Car17.269323
\n", + "
" + ], + "text/plain": [ + " customer st gender customer_lifetime_value income \\\n", + "0 RB50392 Washington NaN NaN 0.0 \n", + "1 QZ44356 Arizona F 697953.59% 0.0 \n", + "2 AI49188 Nevada F 1288743.17% 48767.0 \n", + "3 WW63253 California M 764586.18% 0.0 \n", + "4 GA49547 Washington M 536307.65% 36357.0 \n", + "\n", + " monthly_premium_auto policy_type vehicle_class total_claim_amount \n", + "0 1000.0 Personal Auto Four-Door Car 2.704934 \n", + "1 94.0 Personal Auto Four-Door Car 1131.464935 \n", + "2 108.0 Personal Auto Two-Door Car 566.472247 \n", + "3 106.0 Corporate Auto SUV 529.881344 \n", + "4 68.0 Personal Auto Four-Door Car 17.269323 " + ] + }, + "execution_count": 354, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Data cleaning\n", + "\n", + "# Delete the column education and the number of open complaints from the dataframe\n", + "\n", + "cus_data = cus_data.drop(['education'], axis = 1)\n", + "\n", + "cus_data = cus_data.drop(['number_of_open_complaints'], axis = 1)\n", + "\n", + "cus_data.head()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b78633cc", + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 355, + "id": "331b02a9", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
customerstgendercustomer_lifetime_valueincomemonthly_premium_autopolicy_typevehicle_classtotal_claim_amount
1QZ44356ArizonaF6.979536e+070.094.0Personal AutoFour-Door Car1131.464935
2AI49188NevadaF1.288743e+0848767.0108.0Personal AutoTwo-Door Car566.472247
3WW63253CaliforniaM7.645862e+070.0106.0Corporate AutoSUV529.881344
4GA49547WashingtonM5.363076e+0736357.068.0Personal AutoFour-Door Car17.269323
5OC83172OregonF8.256298e+0762902.069.0Personal AutoTwo-Door Car159.383042
..............................
7065LA72316CaliforniaM2.340599e+0671941.073.0Personal AutoFour-Door Car198.234764
7066PK87824CaliforniaF3.096511e+0521604.079.0Corporate AutoFour-Door Car379.200000
7067TD14365CaliforniaM8.163890e+050.085.0Corporate AutoFour-Door Car790.784983
7068UP19263CaliforniaM7.524442e+0521941.096.0Personal AutoFour-Door Car691.200000
7069Y167826CaliforniaM2.611837e+050.077.0Corporate AutoTwo-Door Car369.600000
\n", + "

9010 rows × 9 columns

\n", + "
" + ], + "text/plain": [ + " customer st gender customer_lifetime_value income \\\n", + "1 QZ44356 Arizona F 6.979536e+07 0.0 \n", + "2 AI49188 Nevada F 1.288743e+08 48767.0 \n", + "3 WW63253 California M 7.645862e+07 0.0 \n", + "4 GA49547 Washington M 5.363076e+07 36357.0 \n", + "5 OC83172 Oregon F 8.256298e+07 62902.0 \n", + "... ... ... ... ... ... \n", + "7065 LA72316 California M 2.340599e+06 71941.0 \n", + "7066 PK87824 California F 3.096511e+05 21604.0 \n", + "7067 TD14365 California M 8.163890e+05 0.0 \n", + "7068 UP19263 California M 7.524442e+05 21941.0 \n", + "7069 Y167826 California M 2.611837e+05 0.0 \n", + "\n", + " monthly_premium_auto policy_type vehicle_class total_claim_amount \n", + "1 94.0 Personal Auto Four-Door Car 1131.464935 \n", + "2 108.0 Personal Auto Two-Door Car 566.472247 \n", + "3 106.0 Corporate Auto SUV 529.881344 \n", + "4 68.0 Personal Auto Four-Door Car 17.269323 \n", + "5 69.0 Personal Auto Two-Door Car 159.383042 \n", + "... ... ... ... ... \n", + "7065 73.0 Personal Auto Four-Door Car 198.234764 \n", + "7066 79.0 Corporate Auto Four-Door Car 379.200000 \n", + "7067 85.0 Corporate Auto Four-Door Car 790.784983 \n", + "7068 96.0 Personal Auto Four-Door Car 691.200000 \n", + "7069 77.0 Corporate Auto Two-Door Car 369.600000 \n", + "\n", + "[9010 rows x 9 columns]" + ] + }, + "execution_count": 355, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Correct the values in the column customer lifetime value. \n", + "# They are given as a percent, so multiply them by 100 and change `dtype` to `numerical` type\n", + "\n", + "cus_data = cus_data.dropna() # removing null values\n", + "\n", + "cus_data['customer_lifetime_value'] = cus_data['customer_lifetime_value'].astype(str) # On cus_data3 (third df) the dtype of the customer lifetime column was 'float', then before cutting out the '%' character i had to convert the column to 'str' dtype\n", + "\n", + "cus_data['customer_lifetime_value'] = cus_data['customer_lifetime_value'].str.replace('%','') # Excluding the '%' character from the colum so that i can convert all the column to float.\n", + "\n", + "cus_data['customer_lifetime_value'] = cus_data['customer_lifetime_value'].astype(float) # Converting the column to float dtype.\n", + "\n", + "cus_data['customer_lifetime_value'] = cus_data['customer_lifetime_value'].mul(100) # Multiplying the values by 100, the numbers are very large. ex -> 6.979536e+07 = 69,795,360\n", + "\n", + "\n", + "cus_data\n" + ] + }, + { + "cell_type": "code", + "execution_count": 356, + "id": "e1a37433", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
customerstgendercustomer_lifetime_valueincomemonthly_premium_autopolicy_typevehicle_classtotal_claim_amount
1QZ44356ArizonaF6.979536e+070.094.0Personal AutoFour-Door Car1131.464935
2AI49188NevadaF1.288743e+0848767.0108.0Personal AutoTwo-Door Car566.472247
3WW63253CaliforniaM7.645862e+070.0106.0Corporate AutoSUV529.881344
4GA49547WashingtonM5.363076e+0736357.068.0Personal AutoFour-Door Car17.269323
5OC83172OregonF8.256298e+0762902.069.0Personal AutoTwo-Door Car159.383042
..............................
7065LA72316CaliforniaM2.340599e+0671941.073.0Personal AutoFour-Door Car198.234764
7066PK87824CaliforniaF3.096511e+0521604.079.0Corporate AutoFour-Door Car379.200000
7067TD14365CaliforniaM8.163890e+050.085.0Corporate AutoFour-Door Car790.784983
7068UP19263CaliforniaM7.524442e+0521941.096.0Personal AutoFour-Door Car691.200000
7069Y167826CaliforniaM2.611837e+050.077.0Corporate AutoTwo-Door Car369.600000
\n", + "

9007 rows × 9 columns

\n", + "
" + ], + "text/plain": [ + " customer st gender customer_lifetime_value income \\\n", + "1 QZ44356 Arizona F 6.979536e+07 0.0 \n", + "2 AI49188 Nevada F 1.288743e+08 48767.0 \n", + "3 WW63253 California M 7.645862e+07 0.0 \n", + "4 GA49547 Washington M 5.363076e+07 36357.0 \n", + "5 OC83172 Oregon F 8.256298e+07 62902.0 \n", + "... ... ... ... ... ... \n", + "7065 LA72316 California M 2.340599e+06 71941.0 \n", + "7066 PK87824 California F 3.096511e+05 21604.0 \n", + "7067 TD14365 California M 8.163890e+05 0.0 \n", + "7068 UP19263 California M 7.524442e+05 21941.0 \n", + "7069 Y167826 California M 2.611837e+05 0.0 \n", + "\n", + " monthly_premium_auto policy_type vehicle_class total_claim_amount \n", + "1 94.0 Personal Auto Four-Door Car 1131.464935 \n", + "2 108.0 Personal Auto Two-Door Car 566.472247 \n", + "3 106.0 Corporate Auto SUV 529.881344 \n", + "4 68.0 Personal Auto Four-Door Car 17.269323 \n", + "5 69.0 Personal Auto Two-Door Car 159.383042 \n", + "... ... ... ... ... \n", + "7065 73.0 Personal Auto Four-Door Car 198.234764 \n", + "7066 79.0 Corporate Auto Four-Door Car 379.200000 \n", + "7067 85.0 Corporate Auto Four-Door Car 790.784983 \n", + "7068 96.0 Personal Auto Four-Door Car 691.200000 \n", + "7069 77.0 Corporate Auto Two-Door Car 369.600000 \n", + "\n", + "[9007 rows x 9 columns]" + ] + }, + "execution_count": 356, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Check for duplicate rows in the data and remove if any.\n", + "\n", + "cus_data.duplicated()\n", + "\n", + "cus_data.drop_duplicates()\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 357, + "id": "0135494b", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
customerstgendercustomer_lifetime_valueincomemonthly_premium_autopolicy_typevehicle_classtotal_claim_amount
2AI49188NevadaF1.288743e+0848767.0108.0Personal AutoTwo-Door Car566.472247
4GA49547WashingtonM5.363076e+0736357.068.0Personal AutoFour-Door Car17.269323
5OC83172OregonF8.256298e+0762902.069.0Personal AutoTwo-Door Car159.383042
6XZ87318OregonF5.380899e+0755350.067.0Corporate AutoFour-Door Car321.600000
8DY87989OregonM2.412750e+0814072.071.0Corporate AutoFour-Door Car511.200000
..............................
7063TF56202CaliforniaM5.032165e+0566367.064.0Personal AutoTwo-Door Car307.200000
7064YM19146CaliforniaF4.100399e+0547761.0104.0Personal AutoFour-Door Car541.282007
7065LA72316CaliforniaM2.340599e+0671941.073.0Personal AutoFour-Door Car198.234764
7066PK87824CaliforniaF3.096511e+0521604.079.0Corporate AutoFour-Door Car379.200000
7068UP19263CaliforniaM7.524442e+0521941.096.0Personal AutoFour-Door Car691.200000
\n", + "

6741 rows × 9 columns

\n", + "
" + ], + "text/plain": [ + " customer st gender customer_lifetime_value income \\\n", + "2 AI49188 Nevada F 1.288743e+08 48767.0 \n", + "4 GA49547 Washington M 5.363076e+07 36357.0 \n", + "5 OC83172 Oregon F 8.256298e+07 62902.0 \n", + "6 XZ87318 Oregon F 5.380899e+07 55350.0 \n", + "8 DY87989 Oregon M 2.412750e+08 14072.0 \n", + "... ... ... ... ... ... \n", + "7063 TF56202 California M 5.032165e+05 66367.0 \n", + "7064 YM19146 California F 4.100399e+05 47761.0 \n", + "7065 LA72316 California M 2.340599e+06 71941.0 \n", + "7066 PK87824 California F 3.096511e+05 21604.0 \n", + "7068 UP19263 California M 7.524442e+05 21941.0 \n", + "\n", + " monthly_premium_auto policy_type vehicle_class total_claim_amount \n", + "2 108.0 Personal Auto Two-Door Car 566.472247 \n", + "4 68.0 Personal Auto Four-Door Car 17.269323 \n", + "5 69.0 Personal Auto Two-Door Car 159.383042 \n", + "6 67.0 Corporate Auto Four-Door Car 321.600000 \n", + "8 71.0 Corporate Auto Four-Door Car 511.200000 \n", + "... ... ... ... ... \n", + "7063 64.0 Personal Auto Two-Door Car 307.200000 \n", + "7064 104.0 Personal Auto Four-Door Car 541.282007 \n", + "7065 73.0 Personal Auto Four-Door Car 198.234764 \n", + "7066 79.0 Corporate Auto Four-Door Car 379.200000 \n", + "7068 96.0 Personal Auto Four-Door Car 691.200000 \n", + "\n", + "[6741 rows x 9 columns]" + ] + }, + "execution_count": 357, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Filter out the data for customers who have an income of 0 or less.\n", + "\n", + "cus_data = cus_data.loc[cus_data['income'] > 0]\n", + "\n", + "cus_data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2c6f4495", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3d155737", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.4" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/[lab_customer_analysis_round1] leonardo_pagliacci.ipynb b/[lab_customer_analysis_round1] leonardo_pagliacci.ipynb new file mode 100644 index 0000000..cb4aab0 --- /dev/null +++ b/[lab_customer_analysis_round1] leonardo_pagliacci.ipynb @@ -0,0 +1,1547 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 347, + "id": "958305e9", + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n" + ] + }, + { + "cell_type": "code", + "execution_count": 348, + "id": "ff4186ae", + "metadata": {}, + "outputs": [], + "source": [ + "# Read the three files into python as dataframes\n", + "\n", + "cus_data1 = pd.read_csv(\"/Users/leozinho.air/Desktop/Ironhack_DA/class_03/Lab 1 - customer analysis/lab-customer-analysis-round-1/files_for_lab/csv_files/file1.csv\")\n", + "\n", + "cus_data2 = pd.read_csv(r\"/Users/leozinho.air/Desktop/Ironhack_DA/class_03/Lab 1 - customer analysis/lab-customer-analysis-round-1/files_for_lab/csv_files/file2.csv\")\n", + "\n", + "cus_data3 = pd.read_csv(r\"/Users/leozinho.air/Desktop/Ironhack_DA/class_03/Lab 1 - customer analysis/lab-customer-analysis-round-1/files_for_lab/csv_files/file3.csv\")\n", + "\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 349, + "id": "35c9e5a3", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(4008, 11)\n", + "(996, 11)\n", + "(7070, 11)\n" + ] + } + ], + "source": [ + "# Show the DataFrame's shape.\n", + "\n", + "print(cus_data1.shape)\n", + "\n", + "print(cus_data2.shape)\n", + "\n", + "print(cus_data3.shape)" + ] + }, + { + "cell_type": "code", + "execution_count": 350, + "id": "29b4433c", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
customerstatecustomer_lifetime_valueeducationgenderincomemonthly_premium_autonumber_of_open_complaintspolicy_typetotal_claim_amountvehicle_class
0SA25987Washington3479.137523High School or BelowM01040Personal Auto499.200000Two-Door Car
1TB86706Arizona2502.637401MasterM0660Personal Auto3.468912Two-Door Car
2ZL73902Nevada3265.156348BachelorF25820820Personal Auto393.600000Four-Door Car
3KX23516California4455.843406High School or BelowF01210Personal Auto699.615192SUV
4FN77294California7704.958480High School or BelowM303661012Personal Auto484.800000SUV
....................................
7065LA72316California23405.987980BachelorM71941730Personal Auto198.234764Four-Door Car
7066PK87824California3096.511217CollegeF21604790Corporate Auto379.200000Four-Door Car
7067TD14365California8163.890428BachelorM0853Corporate Auto790.784983Four-Door Car
7068UP19263California7524.442436CollegeM21941960Personal Auto691.200000Four-Door Car
7069Y167826California2611.836866CollegeM0770Corporate Auto369.600000Two-Door Car
\n", + "

7070 rows × 11 columns

\n", + "
" + ], + "text/plain": [ + " customer state customer_lifetime_value education \\\n", + "0 SA25987 Washington 3479.137523 High School or Below \n", + "1 TB86706 Arizona 2502.637401 Master \n", + "2 ZL73902 Nevada 3265.156348 Bachelor \n", + "3 KX23516 California 4455.843406 High School or Below \n", + "4 FN77294 California 7704.958480 High School or Below \n", + "... ... ... ... ... \n", + "7065 LA72316 California 23405.987980 Bachelor \n", + "7066 PK87824 California 3096.511217 College \n", + "7067 TD14365 California 8163.890428 Bachelor \n", + "7068 UP19263 California 7524.442436 College \n", + "7069 Y167826 California 2611.836866 College \n", + "\n", + " gender income monthly_premium_auto number_of_open_complaints \\\n", + "0 M 0 104 0 \n", + "1 M 0 66 0 \n", + "2 F 25820 82 0 \n", + "3 F 0 121 0 \n", + "4 M 30366 101 2 \n", + "... ... ... ... ... \n", + "7065 M 71941 73 0 \n", + "7066 F 21604 79 0 \n", + "7067 M 0 85 3 \n", + "7068 M 21941 96 0 \n", + "7069 M 0 77 0 \n", + "\n", + " policy_type total_claim_amount vehicle_class \n", + "0 Personal Auto 499.200000 Two-Door Car \n", + "1 Personal Auto 3.468912 Two-Door Car \n", + "2 Personal Auto 393.600000 Four-Door Car \n", + "3 Personal Auto 699.615192 SUV \n", + "4 Personal Auto 484.800000 SUV \n", + "... ... ... ... \n", + "7065 Personal Auto 198.234764 Four-Door Car \n", + "7066 Corporate Auto 379.200000 Four-Door Car \n", + "7067 Corporate Auto 790.784983 Four-Door Car \n", + "7068 Personal Auto 691.200000 Four-Door Car \n", + "7069 Corporate Auto 369.600000 Two-Door Car \n", + "\n", + "[7070 rows x 11 columns]" + ] + }, + "execution_count": 350, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Standardize header names.\n", + "\n", + "cols1 = []\n", + "\n", + "cols2 = []\n", + "\n", + "cols3 = []\n", + "\n", + "\n", + "for i in range(len(cus_data1.columns)):\n", + " cols1.append(cus_data1.columns[i].lower().replace(' ', '_'))\n", + "\n", + "cus_data1.columns = cols1\n", + "\n", + "cus_data1\n", + "\n", + "for i in range(len(cus_data2.columns)):\n", + " cols2.append(cus_data2.columns[i].lower().replace(' ', '_'))\n", + "\n", + "cus_data2.columns = cols2\n", + "\n", + "cus_data2\n", + "\n", + "for i in range(len(cus_data3.columns)):\n", + " cols3.append(cus_data3.columns[i].lower().replace(' ', '_'))\n", + "\n", + "cus_data3.columns = cols3\n", + "\n", + "cus_data3\n", + "\n", + " \n" + ] + }, + { + "cell_type": "code", + "execution_count": 351, + "id": "39a98573", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Index(['customer', 'st', 'gender', 'education', 'customer_lifetime_value',\n", + " 'income', 'monthly_premium_auto', 'number_of_open_complaints',\n", + " 'policy_type', 'vehicle_class', 'total_claim_amount'],\n", + " dtype='object')\n", + "Index(['customer', 'st', 'gender', 'education', 'customer_lifetime_value',\n", + " 'income', 'monthly_premium_auto', 'number_of_open_complaints',\n", + " 'policy_type', 'vehicle_class', 'total_claim_amount'],\n", + " dtype='object')\n", + "Index(['customer', 'st', 'gender', 'education', 'customer_lifetime_value',\n", + " 'income', 'monthly_premium_auto', 'number_of_open_complaints',\n", + " 'policy_type', 'vehicle_class', 'total_claim_amount'],\n", + " dtype='object')\n" + ] + } + ], + "source": [ + "# Rearrange the columns in the dataframe as needed\n", + "\n", + "# We use the columns of cus_data1 as the default columns for ours dataframes\n", + "\n", + "cus_data2 = cus_data2[['customer', 'st', 'gender', 'education', 'customer_lifetime_value',\n", + " 'income', 'monthly_premium_auto', 'number_of_open_complaints',\n", + " 'policy_type', 'vehicle_class', 'total_claim_amount']] # Changing the order of the columns\n", + "\n", + "cus_data2\n", + "\n", + "# Now i am going to do the same for cus_data3\n", + "\n", + "cus_data3['st'] = cus_data3['state']\n", + "cus_data3 = cus_data3.drop(columns ='state') # renaming the column 'state'\n", + "\n", + "cus_data3 # Thankfully renamed\n", + "\n", + "# Now let's order cus_data3\n", + "\n", + "desired_order = ['customer', 'st', 'gender', 'education', 'customer_lifetime_value',\n", + " 'income', 'monthly_premium_auto', 'number_of_open_complaints',\n", + " 'policy_type', 'vehicle_class', 'total_claim_amount']\n", + "cus_data3 = cus_data3[desired_order]\n", + "\n", + "# Checking if everything is ok\n", + "print(cus_data3.columns)\n", + "print(cus_data2.columns)\n", + "print(cus_data1.columns)" + ] + }, + { + "cell_type": "code", + "execution_count": 352, + "id": "7aadaafc", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
customerstgendereducationcustomer_lifetime_valueincomemonthly_premium_autonumber_of_open_complaintspolicy_typevehicle_classtotal_claim_amount
6970NA56063CaliforniaMCollege43019.012780.0132.00Corporate AutoSUV904.378268
6971CA62597CaliforniaFBachelor7482.8510410.0101.00Corporate AutoSUV978.257137
6972ZO23305CaliforniaMBachelor4929.54969925632.063.00Personal AutoTwo-Door Car351.270869
6973UD20999CaliforniaMMaster2537.6675110.068.00Corporate AutoFour-Door Car326.400000
6974FQ66351CaliforniaFBachelor5322.29168833681.0138.01Personal AutoSUV873.127345
....................................
7065LA72316CaliforniaMBachelor23405.9879871941.073.00Personal AutoFour-Door Car198.234764
7066PK87824CaliforniaFCollege3096.51121721604.079.00Corporate AutoFour-Door Car379.200000
7067TD14365CaliforniaMBachelor8163.8904280.085.03Corporate AutoFour-Door Car790.784983
7068UP19263CaliforniaMCollege7524.44243621941.096.00Personal AutoFour-Door Car691.200000
7069Y167826CaliforniaMCollege2611.8368660.077.00Corporate AutoTwo-Door Car369.600000
\n", + "

100 rows × 11 columns

\n", + "
" + ], + "text/plain": [ + " customer st gender education customer_lifetime_value income \\\n", + "6970 NA56063 California M College 43019.01278 0.0 \n", + "6971 CA62597 California F Bachelor 7482.851041 0.0 \n", + "6972 ZO23305 California M Bachelor 4929.549699 25632.0 \n", + "6973 UD20999 California M Master 2537.667511 0.0 \n", + "6974 FQ66351 California F Bachelor 5322.291688 33681.0 \n", + "... ... ... ... ... ... ... \n", + "7065 LA72316 California M Bachelor 23405.98798 71941.0 \n", + "7066 PK87824 California F College 3096.511217 21604.0 \n", + "7067 TD14365 California M Bachelor 8163.890428 0.0 \n", + "7068 UP19263 California M College 7524.442436 21941.0 \n", + "7069 Y167826 California M College 2611.836866 0.0 \n", + "\n", + " monthly_premium_auto number_of_open_complaints policy_type \\\n", + "6970 132.0 0 Corporate Auto \n", + "6971 101.0 0 Corporate Auto \n", + "6972 63.0 0 Personal Auto \n", + "6973 68.0 0 Corporate Auto \n", + "6974 138.0 1 Personal Auto \n", + "... ... ... ... \n", + "7065 73.0 0 Personal Auto \n", + "7066 79.0 0 Corporate Auto \n", + "7067 85.0 3 Corporate Auto \n", + "7068 96.0 0 Personal Auto \n", + "7069 77.0 0 Corporate Auto \n", + "\n", + " vehicle_class total_claim_amount \n", + "6970 SUV 904.378268 \n", + "6971 SUV 978.257137 \n", + "6972 Two-Door Car 351.270869 \n", + "6973 Four-Door Car 326.400000 \n", + "6974 SUV 873.127345 \n", + "... ... ... \n", + "7065 Four-Door Car 198.234764 \n", + "7066 Four-Door Car 379.200000 \n", + "7067 Four-Door Car 790.784983 \n", + "7068 Four-Door Car 691.200000 \n", + "7069 Two-Door Car 369.600000 \n", + "\n", + "[100 rows x 11 columns]" + ] + }, + "execution_count": 352, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Concatenate the three dataframes\n", + "\n", + "cus_data = pd.concat([cus_data1, cus_data2, cus_data3], axis = 0) # axis = 0 is not mandatory but is better putting it on\n", + "\n", + "cus_data.tail(100)" + ] + }, + { + "cell_type": "code", + "execution_count": 353, + "id": "47da75af", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "\"\\n\\n'income','monthly_premium_auto','total_claim_amount ' are numerical, the other ones are object (strings or whatsoever).\\n\\ncustomer object = Id referring to the customer\\nst object = State in which the car was bought\\ngender object = gender of the buyer\\neducation object = education of the buyer\\ncustomer_lifetime_value object = a metric that represents the total net profit a company can expect to generate from a customer throughout their entire relationship\\nincome float64 = annual income of the buyer\\nmonthly_premium_auto float64 = monthly premium that the buyer has to pay\\nnumber_of_open_complaints object = ...\\npolicy_type object = ...\\nvehicle_class object = ...\\ntotal_claim_amount float64 = ...\\n\\n\"" + ] + }, + "execution_count": 353, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Which columns are numerical?\n", + "# Which columns are categorial?\n", + "\n", + "cus_data.dtypes\n", + "\n", + "\n", + "'''\n", + "\n", + "'income','monthly_premium_auto','total_claim_amount ' are numerical, the other ones are object (strings or whatsoever).\n", + "\n", + "customer object = Id referring to the customer\n", + "st object = State in which the car was bought\n", + "gender object = gender of the buyer\n", + "education object = education of the buyer\n", + "customer_lifetime_value object = a metric that represents the total net profit a company can expect to generate from a customer throughout their entire relationship\n", + "income float64 = annual income of the buyer\n", + "monthly_premium_auto float64 = monthly premium that the buyer has to pay\n", + "number_of_open_complaints object = ...\n", + "policy_type object = ...\n", + "vehicle_class object = ...\n", + "total_claim_amount float64 = ...\n", + "\n", + "'''" + ] + }, + { + "cell_type": "code", + "execution_count": 354, + "id": "ef630730", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
customerstgendercustomer_lifetime_valueincomemonthly_premium_autopolicy_typevehicle_classtotal_claim_amount
0RB50392WashingtonNaNNaN0.01000.0Personal AutoFour-Door Car2.704934
1QZ44356ArizonaF697953.59%0.094.0Personal AutoFour-Door Car1131.464935
2AI49188NevadaF1288743.17%48767.0108.0Personal AutoTwo-Door Car566.472247
3WW63253CaliforniaM764586.18%0.0106.0Corporate AutoSUV529.881344
4GA49547WashingtonM536307.65%36357.068.0Personal AutoFour-Door Car17.269323
\n", + "
" + ], + "text/plain": [ + " customer st gender customer_lifetime_value income \\\n", + "0 RB50392 Washington NaN NaN 0.0 \n", + "1 QZ44356 Arizona F 697953.59% 0.0 \n", + "2 AI49188 Nevada F 1288743.17% 48767.0 \n", + "3 WW63253 California M 764586.18% 0.0 \n", + "4 GA49547 Washington M 536307.65% 36357.0 \n", + "\n", + " monthly_premium_auto policy_type vehicle_class total_claim_amount \n", + "0 1000.0 Personal Auto Four-Door Car 2.704934 \n", + "1 94.0 Personal Auto Four-Door Car 1131.464935 \n", + "2 108.0 Personal Auto Two-Door Car 566.472247 \n", + "3 106.0 Corporate Auto SUV 529.881344 \n", + "4 68.0 Personal Auto Four-Door Car 17.269323 " + ] + }, + "execution_count": 354, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Data cleaning\n", + "\n", + "# Delete the column education and the number of open complaints from the dataframe\n", + "\n", + "cus_data = cus_data.drop(['education'], axis = 1)\n", + "\n", + "cus_data = cus_data.drop(['number_of_open_complaints'], axis = 1)\n", + "\n", + "cus_data.head()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b78633cc", + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 355, + "id": "331b02a9", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
customerstgendercustomer_lifetime_valueincomemonthly_premium_autopolicy_typevehicle_classtotal_claim_amount
1QZ44356ArizonaF6.979536e+070.094.0Personal AutoFour-Door Car1131.464935
2AI49188NevadaF1.288743e+0848767.0108.0Personal AutoTwo-Door Car566.472247
3WW63253CaliforniaM7.645862e+070.0106.0Corporate AutoSUV529.881344
4GA49547WashingtonM5.363076e+0736357.068.0Personal AutoFour-Door Car17.269323
5OC83172OregonF8.256298e+0762902.069.0Personal AutoTwo-Door Car159.383042
..............................
7065LA72316CaliforniaM2.340599e+0671941.073.0Personal AutoFour-Door Car198.234764
7066PK87824CaliforniaF3.096511e+0521604.079.0Corporate AutoFour-Door Car379.200000
7067TD14365CaliforniaM8.163890e+050.085.0Corporate AutoFour-Door Car790.784983
7068UP19263CaliforniaM7.524442e+0521941.096.0Personal AutoFour-Door Car691.200000
7069Y167826CaliforniaM2.611837e+050.077.0Corporate AutoTwo-Door Car369.600000
\n", + "

9010 rows × 9 columns

\n", + "
" + ], + "text/plain": [ + " customer st gender customer_lifetime_value income \\\n", + "1 QZ44356 Arizona F 6.979536e+07 0.0 \n", + "2 AI49188 Nevada F 1.288743e+08 48767.0 \n", + "3 WW63253 California M 7.645862e+07 0.0 \n", + "4 GA49547 Washington M 5.363076e+07 36357.0 \n", + "5 OC83172 Oregon F 8.256298e+07 62902.0 \n", + "... ... ... ... ... ... \n", + "7065 LA72316 California M 2.340599e+06 71941.0 \n", + "7066 PK87824 California F 3.096511e+05 21604.0 \n", + "7067 TD14365 California M 8.163890e+05 0.0 \n", + "7068 UP19263 California M 7.524442e+05 21941.0 \n", + "7069 Y167826 California M 2.611837e+05 0.0 \n", + "\n", + " monthly_premium_auto policy_type vehicle_class total_claim_amount \n", + "1 94.0 Personal Auto Four-Door Car 1131.464935 \n", + "2 108.0 Personal Auto Two-Door Car 566.472247 \n", + "3 106.0 Corporate Auto SUV 529.881344 \n", + "4 68.0 Personal Auto Four-Door Car 17.269323 \n", + "5 69.0 Personal Auto Two-Door Car 159.383042 \n", + "... ... ... ... ... \n", + "7065 73.0 Personal Auto Four-Door Car 198.234764 \n", + "7066 79.0 Corporate Auto Four-Door Car 379.200000 \n", + "7067 85.0 Corporate Auto Four-Door Car 790.784983 \n", + "7068 96.0 Personal Auto Four-Door Car 691.200000 \n", + "7069 77.0 Corporate Auto Two-Door Car 369.600000 \n", + "\n", + "[9010 rows x 9 columns]" + ] + }, + "execution_count": 355, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Correct the values in the column customer lifetime value. \n", + "# They are given as a percent, so multiply them by 100 and change `dtype` to `numerical` type\n", + "\n", + "cus_data = cus_data.dropna() # removing null values\n", + "\n", + "cus_data['customer_lifetime_value'] = cus_data['customer_lifetime_value'].astype(str) # On cus_data3 (third df) the dtype of the customer lifetime column was 'float', then before cutting out the '%' character i had to convert the column to 'str' dtype\n", + "\n", + "cus_data['customer_lifetime_value'] = cus_data['customer_lifetime_value'].str.replace('%','') # Excluding the '%' character from the colum so that i can convert all the column to float.\n", + "\n", + "cus_data['customer_lifetime_value'] = cus_data['customer_lifetime_value'].astype(float) # Converting the column to float dtype.\n", + "\n", + "cus_data['customer_lifetime_value'] = cus_data['customer_lifetime_value'].mul(100) # Multiplying the values by 100, the numbers are very large. ex -> 6.979536e+07 = 69,795,360\n", + "\n", + "\n", + "cus_data\n" + ] + }, + { + "cell_type": "code", + "execution_count": 356, + "id": "e1a37433", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
customerstgendercustomer_lifetime_valueincomemonthly_premium_autopolicy_typevehicle_classtotal_claim_amount
1QZ44356ArizonaF6.979536e+070.094.0Personal AutoFour-Door Car1131.464935
2AI49188NevadaF1.288743e+0848767.0108.0Personal AutoTwo-Door Car566.472247
3WW63253CaliforniaM7.645862e+070.0106.0Corporate AutoSUV529.881344
4GA49547WashingtonM5.363076e+0736357.068.0Personal AutoFour-Door Car17.269323
5OC83172OregonF8.256298e+0762902.069.0Personal AutoTwo-Door Car159.383042
..............................
7065LA72316CaliforniaM2.340599e+0671941.073.0Personal AutoFour-Door Car198.234764
7066PK87824CaliforniaF3.096511e+0521604.079.0Corporate AutoFour-Door Car379.200000
7067TD14365CaliforniaM8.163890e+050.085.0Corporate AutoFour-Door Car790.784983
7068UP19263CaliforniaM7.524442e+0521941.096.0Personal AutoFour-Door Car691.200000
7069Y167826CaliforniaM2.611837e+050.077.0Corporate AutoTwo-Door Car369.600000
\n", + "

9007 rows × 9 columns

\n", + "
" + ], + "text/plain": [ + " customer st gender customer_lifetime_value income \\\n", + "1 QZ44356 Arizona F 6.979536e+07 0.0 \n", + "2 AI49188 Nevada F 1.288743e+08 48767.0 \n", + "3 WW63253 California M 7.645862e+07 0.0 \n", + "4 GA49547 Washington M 5.363076e+07 36357.0 \n", + "5 OC83172 Oregon F 8.256298e+07 62902.0 \n", + "... ... ... ... ... ... \n", + "7065 LA72316 California M 2.340599e+06 71941.0 \n", + "7066 PK87824 California F 3.096511e+05 21604.0 \n", + "7067 TD14365 California M 8.163890e+05 0.0 \n", + "7068 UP19263 California M 7.524442e+05 21941.0 \n", + "7069 Y167826 California M 2.611837e+05 0.0 \n", + "\n", + " monthly_premium_auto policy_type vehicle_class total_claim_amount \n", + "1 94.0 Personal Auto Four-Door Car 1131.464935 \n", + "2 108.0 Personal Auto Two-Door Car 566.472247 \n", + "3 106.0 Corporate Auto SUV 529.881344 \n", + "4 68.0 Personal Auto Four-Door Car 17.269323 \n", + "5 69.0 Personal Auto Two-Door Car 159.383042 \n", + "... ... ... ... ... \n", + "7065 73.0 Personal Auto Four-Door Car 198.234764 \n", + "7066 79.0 Corporate Auto Four-Door Car 379.200000 \n", + "7067 85.0 Corporate Auto Four-Door Car 790.784983 \n", + "7068 96.0 Personal Auto Four-Door Car 691.200000 \n", + "7069 77.0 Corporate Auto Two-Door Car 369.600000 \n", + "\n", + "[9007 rows x 9 columns]" + ] + }, + "execution_count": 356, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Check for duplicate rows in the data and remove if any.\n", + "\n", + "cus_data.duplicated()\n", + "\n", + "cus_data.drop_duplicates()\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 357, + "id": "0135494b", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
customerstgendercustomer_lifetime_valueincomemonthly_premium_autopolicy_typevehicle_classtotal_claim_amount
2AI49188NevadaF1.288743e+0848767.0108.0Personal AutoTwo-Door Car566.472247
4GA49547WashingtonM5.363076e+0736357.068.0Personal AutoFour-Door Car17.269323
5OC83172OregonF8.256298e+0762902.069.0Personal AutoTwo-Door Car159.383042
6XZ87318OregonF5.380899e+0755350.067.0Corporate AutoFour-Door Car321.600000
8DY87989OregonM2.412750e+0814072.071.0Corporate AutoFour-Door Car511.200000
..............................
7063TF56202CaliforniaM5.032165e+0566367.064.0Personal AutoTwo-Door Car307.200000
7064YM19146CaliforniaF4.100399e+0547761.0104.0Personal AutoFour-Door Car541.282007
7065LA72316CaliforniaM2.340599e+0671941.073.0Personal AutoFour-Door Car198.234764
7066PK87824CaliforniaF3.096511e+0521604.079.0Corporate AutoFour-Door Car379.200000
7068UP19263CaliforniaM7.524442e+0521941.096.0Personal AutoFour-Door Car691.200000
\n", + "

6741 rows × 9 columns

\n", + "
" + ], + "text/plain": [ + " customer st gender customer_lifetime_value income \\\n", + "2 AI49188 Nevada F 1.288743e+08 48767.0 \n", + "4 GA49547 Washington M 5.363076e+07 36357.0 \n", + "5 OC83172 Oregon F 8.256298e+07 62902.0 \n", + "6 XZ87318 Oregon F 5.380899e+07 55350.0 \n", + "8 DY87989 Oregon M 2.412750e+08 14072.0 \n", + "... ... ... ... ... ... \n", + "7063 TF56202 California M 5.032165e+05 66367.0 \n", + "7064 YM19146 California F 4.100399e+05 47761.0 \n", + "7065 LA72316 California M 2.340599e+06 71941.0 \n", + "7066 PK87824 California F 3.096511e+05 21604.0 \n", + "7068 UP19263 California M 7.524442e+05 21941.0 \n", + "\n", + " monthly_premium_auto policy_type vehicle_class total_claim_amount \n", + "2 108.0 Personal Auto Two-Door Car 566.472247 \n", + "4 68.0 Personal Auto Four-Door Car 17.269323 \n", + "5 69.0 Personal Auto Two-Door Car 159.383042 \n", + "6 67.0 Corporate Auto Four-Door Car 321.600000 \n", + "8 71.0 Corporate Auto Four-Door Car 511.200000 \n", + "... ... ... ... ... \n", + "7063 64.0 Personal Auto Two-Door Car 307.200000 \n", + "7064 104.0 Personal Auto Four-Door Car 541.282007 \n", + "7065 73.0 Personal Auto Four-Door Car 198.234764 \n", + "7066 79.0 Corporate Auto Four-Door Car 379.200000 \n", + "7068 96.0 Personal Auto Four-Door Car 691.200000 \n", + "\n", + "[6741 rows x 9 columns]" + ] + }, + "execution_count": 357, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Filter out the data for customers who have an income of 0 or less.\n", + "\n", + "cus_data = cus_data.loc[cus_data['income'] > 0]\n", + "\n", + "cus_data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2c6f4495", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3d155737", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.4" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/files_for_lab/.DS_Store b/files_for_lab/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..2e7d3e5c175756d42469c962e77a19feafbe1907 GIT binary patch literal 6148 zcmeHKL5tHs6n?XG;| z`zIE>dR6e^!J}vYi0FGWQ%G&XdJ`r-?W59c$ySvuB?kKQ;6)2!3^&NgcGOP4qKCU5fB&Y_q& znVU`1zB_qISI>lqyru7Y&%)_%FBbk^Mvf1Qced#PO2#{KCP^fc1MH=l zH8Fq3n9%?;S(#CG6C;c7Kn3f9WVK>;E1BhA&#KicW|v?8yv%OJxWx>gAH9-g{sSMP zDu;h$nj(v)-)r71HX7e3U=%P4paQf$SQ>4Oxk9;gU?qwt)MbnI z|Hswm|5cK?G71<4E|dbSK5zzooRZ#K*Ct1Mt%c$ X4x+6wSBNcW=0`xvU@D`)KULs2Fcbx$ literal 0 HcmV?d00001