diff --git a/lab-dw-pandas.ipynb b/lab-dw-pandas.ipynb
index fbd46831..e0c23a00 100644
--- a/lab-dw-pandas.ipynb
+++ b/lab-dw-pandas.ipynb
@@ -82,12 +82,1201 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 1,
+ "id": "d807707b",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import pandas as pd\n",
+ "import numpy as np"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
"id": "dd4e8cd8-a6f6-486c-a5c4-1745b0c035f4",
"metadata": {},
"outputs": [],
"source": [
- "# Your code here"
+ "# Input the data set \n",
+ "\n",
+ "url = 'https://raw.githubusercontent.com/data-bootcamp-v4/data/main/file1.csv'\n",
+ "df = pd.read_csv(url)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "id": "dbfafc8f",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Customer | \n",
+ " ST | \n",
+ " GENDER | \n",
+ " Education | \n",
+ " Customer Lifetime Value | \n",
+ " Income | \n",
+ " Monthly Premium Auto | \n",
+ " Number of Open Complaints | \n",
+ " Policy Type | \n",
+ " Vehicle Class | \n",
+ " Total Claim Amount | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " RB50392 | \n",
+ " Washington | \n",
+ " NaN | \n",
+ " Master | \n",
+ " NaN | \n",
+ " 0.0 | \n",
+ " 1000.0 | \n",
+ " 1/0/00 | \n",
+ " Personal Auto | \n",
+ " Four-Door Car | \n",
+ " 2.704934 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " QZ44356 | \n",
+ " Arizona | \n",
+ " F | \n",
+ " Bachelor | \n",
+ " 697953.59% | \n",
+ " 0.0 | \n",
+ " 94.0 | \n",
+ " 1/0/00 | \n",
+ " Personal Auto | \n",
+ " Four-Door Car | \n",
+ " 1131.464935 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " AI49188 | \n",
+ " Nevada | \n",
+ " F | \n",
+ " Bachelor | \n",
+ " 1288743.17% | \n",
+ " 48767.0 | \n",
+ " 108.0 | \n",
+ " 1/0/00 | \n",
+ " Personal Auto | \n",
+ " Two-Door Car | \n",
+ " 566.472247 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " WW63253 | \n",
+ " California | \n",
+ " M | \n",
+ " Bachelor | \n",
+ " 764586.18% | \n",
+ " 0.0 | \n",
+ " 106.0 | \n",
+ " 1/0/00 | \n",
+ " Corporate Auto | \n",
+ " SUV | \n",
+ " 529.881344 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " GA49547 | \n",
+ " Washington | \n",
+ " M | \n",
+ " High School or Below | \n",
+ " 536307.65% | \n",
+ " 36357.0 | \n",
+ " 68.0 | \n",
+ " 1/0/00 | \n",
+ " Personal Auto | \n",
+ " Four-Door Car | \n",
+ " 17.269323 | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 4003 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 4004 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 4005 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 4006 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 4007 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
4008 rows × 11 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Customer ST GENDER Education \\\n",
+ "0 RB50392 Washington NaN Master \n",
+ "1 QZ44356 Arizona F Bachelor \n",
+ "2 AI49188 Nevada F Bachelor \n",
+ "3 WW63253 California M Bachelor \n",
+ "4 GA49547 Washington M High School or Below \n",
+ "... ... ... ... ... \n",
+ "4003 NaN NaN NaN NaN \n",
+ "4004 NaN NaN NaN NaN \n",
+ "4005 NaN NaN NaN NaN \n",
+ "4006 NaN NaN NaN NaN \n",
+ "4007 NaN NaN NaN NaN \n",
+ "\n",
+ " Customer Lifetime Value Income Monthly Premium Auto \\\n",
+ "0 NaN 0.0 1000.0 \n",
+ "1 697953.59% 0.0 94.0 \n",
+ "2 1288743.17% 48767.0 108.0 \n",
+ "3 764586.18% 0.0 106.0 \n",
+ "4 536307.65% 36357.0 68.0 \n",
+ "... ... ... ... \n",
+ "4003 NaN NaN NaN \n",
+ "4004 NaN NaN NaN \n",
+ "4005 NaN NaN NaN \n",
+ "4006 NaN NaN NaN \n",
+ "4007 NaN NaN NaN \n",
+ "\n",
+ " Number of Open Complaints Policy Type Vehicle Class \\\n",
+ "0 1/0/00 Personal Auto Four-Door Car \n",
+ "1 1/0/00 Personal Auto Four-Door Car \n",
+ "2 1/0/00 Personal Auto Two-Door Car \n",
+ "3 1/0/00 Corporate Auto SUV \n",
+ "4 1/0/00 Personal Auto Four-Door Car \n",
+ "... ... ... ... \n",
+ "4003 NaN NaN NaN \n",
+ "4004 NaN NaN NaN \n",
+ "4005 NaN NaN NaN \n",
+ "4006 NaN NaN NaN \n",
+ "4007 NaN NaN NaN \n",
+ "\n",
+ " Total Claim Amount \n",
+ "0 2.704934 \n",
+ "1 1131.464935 \n",
+ "2 566.472247 \n",
+ "3 529.881344 \n",
+ "4 17.269323 \n",
+ "... ... \n",
+ "4003 NaN \n",
+ "4004 NaN \n",
+ "4005 NaN \n",
+ "4006 NaN \n",
+ "4007 NaN \n",
+ "\n",
+ "[4008 rows x 11 columns]"
+ ]
+ },
+ "execution_count": 5,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "f9273a62",
+ "metadata": {},
+ "source": [
+ "- Identify the dimensions of the dataset by determining the number of rows and columns it contains."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "id": "424441f8",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Number of rows: 4008\n",
+ "Number of columns: 11\n"
+ ]
+ }
+ ],
+ "source": [
+ "df.shape\n",
+ "print(f\"Number of rows: {df.shape[0]}\")\n",
+ "print(f\"Number of columns: {df.shape[1]}\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "f2988285",
+ "metadata": {},
+ "source": [
+ "- Determine the data types of each column and evaluate whether they are appropriate for the nature of the variable. You should also provide suggestions for fixing any incorrect data types."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "id": "02d94c0b",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "RangeIndex: 4008 entries, 0 to 4007\n",
+ "Data columns (total 11 columns):\n",
+ " # Column Non-Null Count Dtype \n",
+ "--- ------ -------------- ----- \n",
+ " 0 Customer 1071 non-null object \n",
+ " 1 ST 1071 non-null object \n",
+ " 2 GENDER 954 non-null object \n",
+ " 3 Education 1071 non-null object \n",
+ " 4 Customer Lifetime Value 1068 non-null object \n",
+ " 5 Income 1071 non-null float64\n",
+ " 6 Monthly Premium Auto 1071 non-null float64\n",
+ " 7 Number of Open Complaints 1071 non-null object \n",
+ " 8 Policy Type 1071 non-null object \n",
+ " 9 Vehicle Class 1071 non-null object \n",
+ " 10 Total Claim Amount 1071 non-null float64\n",
+ "dtypes: float64(3), object(8)\n",
+ "memory usage: 344.6+ KB\n"
+ ]
+ }
+ ],
+ "source": [
+ "df.info()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 13,
+ "id": "1b2d3c0b",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "Customer object\n",
+ "ST object\n",
+ "GENDER object\n",
+ "Education object\n",
+ "Customer Lifetime Value object\n",
+ "Income float64\n",
+ "Monthly Premium Auto float64\n",
+ "Number of Open Complaints object\n",
+ "Policy Type object\n",
+ "Vehicle Class object\n",
+ "Total Claim Amount float64\n",
+ "dtype: object"
+ ]
+ },
+ "execution_count": 13,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# types of data\n",
+ "df.dtypes"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "4d35b079",
+ "metadata": {},
+ "source": [
+ "- Customer -> Object (Correct)\n",
+ "- ST -> Object (Correct)\n",
+ "- Gender -> Object (Correct)\n",
+ "-Education object\n",
+ "- Customer Lifetime Value object (Not correct, but it's for the NaN) In my opinion, this should be type float for next calculations \n",
+ "- Income float64 (Correct)\n",
+ "- Monthly Premium Auto float64 (Correct)\n",
+ "- Number of Open Complaints object \n",
+ "- Policy Type object (Correct)\n",
+ "- Vehicle Class object (Correct)\n",
+ "- Total Claim Amount float64 (Correct)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 17,
+ "id": "70db2edc",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "array(['1/0/00', '1/2/00', '1/1/00', '1/3/00', '1/5/00', '1/4/00', nan],\n",
+ " dtype=object)"
+ ]
+ },
+ "execution_count": 17,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df[\"Number of Open Complaints\"].unique()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 14,
+ "id": "ed13492d",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ ""
+ ]
+ },
+ "execution_count": 14,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df.info"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 15,
+ "id": "9f765875",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Customer | \n",
+ " ST | \n",
+ " GENDER | \n",
+ " Education | \n",
+ " Customer Lifetime Value | \n",
+ " Income | \n",
+ " Monthly Premium Auto | \n",
+ " Number of Open Complaints | \n",
+ " Policy Type | \n",
+ " Vehicle Class | \n",
+ " Total Claim Amount | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " RB50392 | \n",
+ " Washington | \n",
+ " NaN | \n",
+ " Master | \n",
+ " NaN | \n",
+ " 0.0 | \n",
+ " 1000.0 | \n",
+ " 1/0/00 | \n",
+ " Personal Auto | \n",
+ " Four-Door Car | \n",
+ " 2.704934 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " QZ44356 | \n",
+ " Arizona | \n",
+ " F | \n",
+ " Bachelor | \n",
+ " 697953.59% | \n",
+ " 0.0 | \n",
+ " 94.0 | \n",
+ " 1/0/00 | \n",
+ " Personal Auto | \n",
+ " Four-Door Car | \n",
+ " 1131.464935 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " AI49188 | \n",
+ " Nevada | \n",
+ " F | \n",
+ " Bachelor | \n",
+ " 1288743.17% | \n",
+ " 48767.0 | \n",
+ " 108.0 | \n",
+ " 1/0/00 | \n",
+ " Personal Auto | \n",
+ " Two-Door Car | \n",
+ " 566.472247 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " WW63253 | \n",
+ " California | \n",
+ " M | \n",
+ " Bachelor | \n",
+ " 764586.18% | \n",
+ " 0.0 | \n",
+ " 106.0 | \n",
+ " 1/0/00 | \n",
+ " Corporate Auto | \n",
+ " SUV | \n",
+ " 529.881344 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " GA49547 | \n",
+ " Washington | \n",
+ " M | \n",
+ " High School or Below | \n",
+ " 536307.65% | \n",
+ " 36357.0 | \n",
+ " 68.0 | \n",
+ " 1/0/00 | \n",
+ " Personal Auto | \n",
+ " Four-Door Car | \n",
+ " 17.269323 | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 4003 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 4004 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 4005 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 4006 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 4007 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
4008 rows × 11 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Customer ST GENDER Education \\\n",
+ "0 RB50392 Washington NaN Master \n",
+ "1 QZ44356 Arizona F Bachelor \n",
+ "2 AI49188 Nevada F Bachelor \n",
+ "3 WW63253 California M Bachelor \n",
+ "4 GA49547 Washington M High School or Below \n",
+ "... ... ... ... ... \n",
+ "4003 NaN NaN NaN NaN \n",
+ "4004 NaN NaN NaN NaN \n",
+ "4005 NaN NaN NaN NaN \n",
+ "4006 NaN NaN NaN NaN \n",
+ "4007 NaN NaN NaN NaN \n",
+ "\n",
+ " Customer Lifetime Value Income Monthly Premium Auto \\\n",
+ "0 NaN 0.0 1000.0 \n",
+ "1 697953.59% 0.0 94.0 \n",
+ "2 1288743.17% 48767.0 108.0 \n",
+ "3 764586.18% 0.0 106.0 \n",
+ "4 536307.65% 36357.0 68.0 \n",
+ "... ... ... ... \n",
+ "4003 NaN NaN NaN \n",
+ "4004 NaN NaN NaN \n",
+ "4005 NaN NaN NaN \n",
+ "4006 NaN NaN NaN \n",
+ "4007 NaN NaN NaN \n",
+ "\n",
+ " Number of Open Complaints Policy Type Vehicle Class \\\n",
+ "0 1/0/00 Personal Auto Four-Door Car \n",
+ "1 1/0/00 Personal Auto Four-Door Car \n",
+ "2 1/0/00 Personal Auto Two-Door Car \n",
+ "3 1/0/00 Corporate Auto SUV \n",
+ "4 1/0/00 Personal Auto Four-Door Car \n",
+ "... ... ... ... \n",
+ "4003 NaN NaN NaN \n",
+ "4004 NaN NaN NaN \n",
+ "4005 NaN NaN NaN \n",
+ "4006 NaN NaN NaN \n",
+ "4007 NaN NaN NaN \n",
+ "\n",
+ " Total Claim Amount \n",
+ "0 2.704934 \n",
+ "1 1131.464935 \n",
+ "2 566.472247 \n",
+ "3 529.881344 \n",
+ "4 17.269323 \n",
+ "... ... \n",
+ "4003 NaN \n",
+ "4004 NaN \n",
+ "4005 NaN \n",
+ "4006 NaN \n",
+ "4007 NaN \n",
+ "\n",
+ "[4008 rows x 11 columns]"
+ ]
+ },
+ "execution_count": 15,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "fe2c79f9",
+ "metadata": {},
+ "source": [
+ "- Identify the number of unique values for each column and determine which columns appear to be categorical. You should also describe the unique values of each categorical column and the range of values for numerical columns, and give your insights."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 18,
+ "id": "39e32c3a",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "Customer 1071\n",
+ "ST 8\n",
+ "GENDER 5\n",
+ "Education 6\n",
+ "Customer Lifetime Value 1027\n",
+ "Income 774\n",
+ "Monthly Premium Auto 132\n",
+ "Number of Open Complaints 6\n",
+ "Policy Type 3\n",
+ "Vehicle Class 6\n",
+ "Total Claim Amount 761\n",
+ "dtype: int64"
+ ]
+ },
+ "execution_count": 18,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Number of unique values for each column\n",
+ "df.nunique()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "0efe1f2e",
+ "metadata": {},
+ "source": [
+ "Columnas categoricas -> Customer, ST, GENDER, Education, Policy Type, Vehicle Class"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 19,
+ "id": "47e89b48",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "array(['RB50392', 'QZ44356', 'AI49188', ..., 'CW49887', 'MY31220', nan],\n",
+ " dtype=object)"
+ ]
+ },
+ "execution_count": 19,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df[\"Customer\"].unique() # hace referencia al código del cliente y por tanto, hay uno diferente para cada uno"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 21,
+ "id": "57fda049",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "array(['Washington', 'Arizona', 'Nevada', 'California', 'Oregon', 'Cali',\n",
+ " 'AZ', 'WA', nan], dtype=object)"
+ ]
+ },
+ "execution_count": 21,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df[\"ST\"].unique() #Hace referencia al estado en el que vive el cliente y hay 8"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 22,
+ "id": "31277ff8",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "array([nan, 'F', 'M', 'Femal', 'Male', 'female'], dtype=object)"
+ ]
+ },
+ "execution_count": 22,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df[\"GENDER\"].unique() #Es el genero de la persona, deberia haber unicamente dos. Hay un problema con la notacion"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 24,
+ "id": "a57af54f",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "array(['Master', 'Bachelor', 'High School or Below', 'College',\n",
+ " 'Bachelors', 'Doctor', nan], dtype=object)"
+ ]
+ },
+ "execution_count": 24,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df[\"Education\"].unique() #hay un problema con la notacion tambien, hay repetidas"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 25,
+ "id": "e38bcbec",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "array(['Personal Auto', 'Corporate Auto', 'Special Auto', nan],\n",
+ " dtype=object)"
+ ]
+ },
+ "execution_count": 25,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df[\"Policy Type\"].unique() #tipo de poliza de seguro, hay 3"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 26,
+ "id": "d9167648",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "array(['Four-Door Car', 'Two-Door Car', 'SUV', 'Luxury SUV', 'Sports Car',\n",
+ " 'Luxury Car', nan], dtype=object)"
+ ]
+ },
+ "execution_count": 26,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df[\"Vehicle Class\"].unique() #tipo de vehiculo"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "03bd14c5",
+ "metadata": {},
+ "source": [
+ "- Compute summary statistics such as mean, median, mode, standard deviation, and quartiles to understand the central tendency and distribution of the data for numerical columns. You should also provide your conclusions based on these summary statistics."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "f2bc8479",
+ "metadata": {},
+ "source": [
+ "El estudio descriptivo solo lo podemos hacer de las variables cuantitativas y no las cualitativas. En el resumen, podemos observar que la media de \"Income\": 39295.701214 y tambien podemos observar que este caso un outlier sera el maximo, pues hay mucha diferencia entre la media con ese valor. Además, de reconocer que el 75% de los usarios esta por debajo de un valor mucho mas pequeño. "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 30,
+ "id": "e51329d6",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Income | \n",
+ " Monthly Premium Auto | \n",
+ " Total Claim Amount | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " count | \n",
+ " 1071.000000 | \n",
+ " 1071.000000 | \n",
+ " 1071.000000 | \n",
+ "
\n",
+ " \n",
+ " mean | \n",
+ " 39295.701214 | \n",
+ " 193.234360 | \n",
+ " 404.986909 | \n",
+ "
\n",
+ " \n",
+ " std | \n",
+ " 30469.427060 | \n",
+ " 1601.190369 | \n",
+ " 293.027260 | \n",
+ "
\n",
+ " \n",
+ " min | \n",
+ " 0.000000 | \n",
+ " 61.000000 | \n",
+ " 0.382107 | \n",
+ "
\n",
+ " \n",
+ " 25% | \n",
+ " 14072.000000 | \n",
+ " 68.000000 | \n",
+ " 202.157702 | \n",
+ "
\n",
+ " \n",
+ " 50% | \n",
+ " 36234.000000 | \n",
+ " 83.000000 | \n",
+ " 354.729129 | \n",
+ "
\n",
+ " \n",
+ " 75% | \n",
+ " 64631.000000 | \n",
+ " 109.500000 | \n",
+ " 532.800000 | \n",
+ "
\n",
+ " \n",
+ " max | \n",
+ " 99960.000000 | \n",
+ " 35354.000000 | \n",
+ " 2893.239678 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Income Monthly Premium Auto Total Claim Amount\n",
+ "count 1071.000000 1071.000000 1071.000000\n",
+ "mean 39295.701214 193.234360 404.986909\n",
+ "std 30469.427060 1601.190369 293.027260\n",
+ "min 0.000000 61.000000 0.382107\n",
+ "25% 14072.000000 68.000000 202.157702\n",
+ "50% 36234.000000 83.000000 354.729129\n",
+ "75% 64631.000000 109.500000 532.800000\n",
+ "max 99960.000000 35354.000000 2893.239678"
+ ]
+ },
+ "execution_count": 30,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df.describe(include=\"float\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "c26659ba",
+ "metadata": {},
+ "source": [
+ "- Compute summary statistics for categorical columns and providing your conclusions based on these statistics."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 31,
+ "id": "b967342f",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Customer | \n",
+ " ST | \n",
+ " GENDER | \n",
+ " Education | \n",
+ " Customer Lifetime Value | \n",
+ " Number of Open Complaints | \n",
+ " Policy Type | \n",
+ " Vehicle Class | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " count | \n",
+ " 1071 | \n",
+ " 1071 | \n",
+ " 954 | \n",
+ " 1071 | \n",
+ " 1068 | \n",
+ " 1071 | \n",
+ " 1071 | \n",
+ " 1071 | \n",
+ "
\n",
+ " \n",
+ " unique | \n",
+ " 1071 | \n",
+ " 8 | \n",
+ " 5 | \n",
+ " 6 | \n",
+ " 1027 | \n",
+ " 6 | \n",
+ " 3 | \n",
+ " 6 | \n",
+ "
\n",
+ " \n",
+ " top | \n",
+ " RB50392 | \n",
+ " Oregon | \n",
+ " F | \n",
+ " Bachelor | \n",
+ " 445811.34% | \n",
+ " 1/0/00 | \n",
+ " Personal Auto | \n",
+ " Four-Door Car | \n",
+ "
\n",
+ " \n",
+ " freq | \n",
+ " 1 | \n",
+ " 320 | \n",
+ " 457 | \n",
+ " 324 | \n",
+ " 4 | \n",
+ " 830 | \n",
+ " 780 | \n",
+ " 576 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Customer ST GENDER Education Customer Lifetime Value \\\n",
+ "count 1071 1071 954 1071 1068 \n",
+ "unique 1071 8 5 6 1027 \n",
+ "top RB50392 Oregon F Bachelor 445811.34% \n",
+ "freq 1 320 457 324 4 \n",
+ "\n",
+ " Number of Open Complaints Policy Type Vehicle Class \n",
+ "count 1071 1071 1071 \n",
+ "unique 6 3 6 \n",
+ "top 1/0/00 Personal Auto Four-Door Car \n",
+ "freq 830 780 576 "
+ ]
+ },
+ "execution_count": 31,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df.describe(include = \"object\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "fa700686",
+ "metadata": {},
+ "source": [
+ "Que la mayoria de clientes soy mujeres, la educación = Bachelor, el número de quejas = 1, poseen coches de cuatro puertas y su poliza es de tipo Personal"
]
},
{
@@ -116,12 +1305,31 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 43,
"id": "2dca5073-4520-4f42-9390-4b92733284ed",
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "ST\n",
+ "AZ 25\n",
+ "WA 30\n",
+ "Washington 81\n",
+ "Nevada 98\n",
+ "Cali 120\n",
+ "Name: count, dtype: int64"
+ ]
+ },
+ "execution_count": 43,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
- "# Your code here"
+ "less_common_location = pd.Series(df[\"ST\"])\n",
+ "top_5 = less_common_location.value_counts().sort_values(ascending=True)[:5]\n",
+ "top_5"
]
},
{
@@ -146,12 +1354,27 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 52,
"id": "bcfad6c1-9af2-4b0b-9aa9-0dc5c17473c0",
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "Policy Type\n",
+ "Personal Auto 780\n",
+ "Name: count, dtype: int64"
+ ]
+ },
+ "execution_count": 52,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
- "# Your code here"
+ "policy_types = pd.Series(df[\"Policy Type\"]).value_counts().sort_values(ascending=False)\n",
+ "max_number_policies_sold = policy_types[:1]\n",
+ "max_number_policies_sold"
]
},
{
@@ -176,12 +1399,68 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 53,
+ "id": "fa0b637f",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "array(['Personal Auto', 'Corporate Auto', 'Special Auto', nan],\n",
+ " dtype=object)"
+ ]
+ },
+ "execution_count": 53,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df[\"Policy Type\"].unique()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 68,
"id": "0c0563cf-6f8b-463d-a321-651a972f82e5",
"metadata": {},
"outputs": [],
"source": [
- "# Your code here"
+ "personal = df[df[\"Policy Type\"] == \"Personal Auto\"]\n",
+ "average_personal = personal[\"Income\"].mean()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 69,
+ "id": "6813e0b7",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "corporate = df[df[\"Policy Type\"] == \"Corporate Auto\"]\n",
+ "average_corporate = corporate[\"Income\"].mean()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 70,
+ "id": "56af80db",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Average income Personal Policy's costumer: 38180.69871794872\n",
+ "Average Income Corporate Policy's costumer: 41390.31196581197\n"
+ ]
+ }
+ ],
+ "source": [
+ "print(f\"Average income Personal Policy's costumer: {average_personal}\")\n",
+ "print(f\"Average Income Corporate Policy's costumer: {average_corporate}\")\n",
+ "\n",
+ "# En media, el salario de los usarios que tiene una poliza corporte es mayor que de aquellos que tienen una personal"
]
},
{
@@ -224,15 +1503,823 @@
"*Hint 2: check `Boolean selection according to the values of a single column` in https://towardsdatascience.com/filtering-data-frames-in-pandas-b570b1f834b9*"
]
},
+ {
+ "cell_type": "markdown",
+ "id": "5e04819e",
+ "metadata": {},
+ "source": [
+ ">75% - > 532.800000"
+ ]
+ },
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 81,
+ "id": "6cc4cf9c",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "532.8"
+ ]
+ },
+ "execution_count": 81,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "quantile_75 = df[\"Total Claim Amount\"].quantile(0.75)\n",
+ "quantile_75"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 72,
"id": "b731bca6-a760-4860-a27b-a33efa712ce0",
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Customer | \n",
+ " ST | \n",
+ " GENDER | \n",
+ " Education | \n",
+ " Customer Lifetime Value | \n",
+ " Income | \n",
+ " Monthly Premium Auto | \n",
+ " Number of Open Complaints | \n",
+ " Policy Type | \n",
+ " Vehicle Class | \n",
+ " Total Claim Amount | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " count | \n",
+ " 1071 | \n",
+ " 1071 | \n",
+ " 954 | \n",
+ " 1071 | \n",
+ " 1068 | \n",
+ " 1071.000000 | \n",
+ " 1071.000000 | \n",
+ " 1071 | \n",
+ " 1071 | \n",
+ " 1071 | \n",
+ " 1071.000000 | \n",
+ "
\n",
+ " \n",
+ " unique | \n",
+ " 1071 | \n",
+ " 8 | \n",
+ " 5 | \n",
+ " 6 | \n",
+ " 1027 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 6 | \n",
+ " 3 | \n",
+ " 6 | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " top | \n",
+ " RB50392 | \n",
+ " Oregon | \n",
+ " F | \n",
+ " Bachelor | \n",
+ " 445811.34% | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 1/0/00 | \n",
+ " Personal Auto | \n",
+ " Four-Door Car | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " freq | \n",
+ " 1 | \n",
+ " 320 | \n",
+ " 457 | \n",
+ " 324 | \n",
+ " 4 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 830 | \n",
+ " 780 | \n",
+ " 576 | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " mean | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 39295.701214 | \n",
+ " 193.234360 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 404.986909 | \n",
+ "
\n",
+ " \n",
+ " std | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 30469.427060 | \n",
+ " 1601.190369 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 293.027260 | \n",
+ "
\n",
+ " \n",
+ " min | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0.000000 | \n",
+ " 61.000000 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0.382107 | \n",
+ "
\n",
+ " \n",
+ " 25% | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 14072.000000 | \n",
+ " 68.000000 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 202.157702 | \n",
+ "
\n",
+ " \n",
+ " 50% | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 36234.000000 | \n",
+ " 83.000000 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 354.729129 | \n",
+ "
\n",
+ " \n",
+ " 75% | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 64631.000000 | \n",
+ " 109.500000 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 532.800000 | \n",
+ "
\n",
+ " \n",
+ " max | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 99960.000000 | \n",
+ " 35354.000000 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 2893.239678 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Customer ST GENDER Education Customer Lifetime Value \\\n",
+ "count 1071 1071 954 1071 1068 \n",
+ "unique 1071 8 5 6 1027 \n",
+ "top RB50392 Oregon F Bachelor 445811.34% \n",
+ "freq 1 320 457 324 4 \n",
+ "mean NaN NaN NaN NaN NaN \n",
+ "std NaN NaN NaN NaN NaN \n",
+ "min NaN NaN NaN NaN NaN \n",
+ "25% NaN NaN NaN NaN NaN \n",
+ "50% NaN NaN NaN NaN NaN \n",
+ "75% NaN NaN NaN NaN NaN \n",
+ "max NaN NaN NaN NaN NaN \n",
+ "\n",
+ " Income Monthly Premium Auto Number of Open Complaints \\\n",
+ "count 1071.000000 1071.000000 1071 \n",
+ "unique NaN NaN 6 \n",
+ "top NaN NaN 1/0/00 \n",
+ "freq NaN NaN 830 \n",
+ "mean 39295.701214 193.234360 NaN \n",
+ "std 30469.427060 1601.190369 NaN \n",
+ "min 0.000000 61.000000 NaN \n",
+ "25% 14072.000000 68.000000 NaN \n",
+ "50% 36234.000000 83.000000 NaN \n",
+ "75% 64631.000000 109.500000 NaN \n",
+ "max 99960.000000 35354.000000 NaN \n",
+ "\n",
+ " Policy Type Vehicle Class Total Claim Amount \n",
+ "count 1071 1071 1071.000000 \n",
+ "unique 3 6 NaN \n",
+ "top Personal Auto Four-Door Car NaN \n",
+ "freq 780 576 NaN \n",
+ "mean NaN NaN 404.986909 \n",
+ "std NaN NaN 293.027260 \n",
+ "min NaN NaN 0.382107 \n",
+ "25% NaN NaN 202.157702 \n",
+ "50% NaN NaN 354.729129 \n",
+ "75% NaN NaN 532.800000 \n",
+ "max NaN NaN 2893.239678 "
+ ]
+ },
+ "execution_count": 72,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
- "# Your code here"
+ "df.describe(include='all')"
]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 82,
+ "id": "f5ee6f8a",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Customer | \n",
+ " ST | \n",
+ " GENDER | \n",
+ " Education | \n",
+ " Customer Lifetime Value | \n",
+ " Income | \n",
+ " Monthly Premium Auto | \n",
+ " Number of Open Complaints | \n",
+ " Policy Type | \n",
+ " Vehicle Class | \n",
+ " Total Claim Amount | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 1 | \n",
+ " QZ44356 | \n",
+ " Arizona | \n",
+ " F | \n",
+ " Bachelor | \n",
+ " 697953.59% | \n",
+ " 0.0 | \n",
+ " 94.0 | \n",
+ " 1/0/00 | \n",
+ " Personal Auto | \n",
+ " Four-Door Car | \n",
+ " 1131.464935 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " AI49188 | \n",
+ " Nevada | \n",
+ " F | \n",
+ " Bachelor | \n",
+ " 1288743.17% | \n",
+ " 48767.0 | \n",
+ " 108.0 | \n",
+ " 1/0/00 | \n",
+ " Personal Auto | \n",
+ " Two-Door Car | \n",
+ " 566.472247 | \n",
+ "
\n",
+ " \n",
+ " 17 | \n",
+ " OE15005 | \n",
+ " Cali | \n",
+ " NaN | \n",
+ " College | \n",
+ " 394524.16% | \n",
+ " 28855.0 | \n",
+ " 101.0 | \n",
+ " 1/0/00 | \n",
+ " Personal Auto | \n",
+ " SUV | \n",
+ " 647.442031 | \n",
+ "
\n",
+ " \n",
+ " 23 | \n",
+ " TZ98966 | \n",
+ " Nevada | \n",
+ " NaN | \n",
+ " Bachelor | \n",
+ " 245019.10% | \n",
+ " 0.0 | \n",
+ " 73.0 | \n",
+ " 1/3/00 | \n",
+ " Corporate Auto | \n",
+ " Four-Door Car | \n",
+ " 554.376763 | \n",
+ "
\n",
+ " \n",
+ " 26 | \n",
+ " US89481 | \n",
+ " California | \n",
+ " NaN | \n",
+ " Bachelor | \n",
+ " 394637.21% | \n",
+ " 0.0 | \n",
+ " 111.0 | \n",
+ " 1/0/00 | \n",
+ " Personal Auto | \n",
+ " Four-Door Car | \n",
+ " 799.200000 | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 1059 | \n",
+ " YG44474 | \n",
+ " Oregon | \n",
+ " M | \n",
+ " College | \n",
+ " 1401472.13% | \n",
+ " 54193.0 | \n",
+ " 117.0 | \n",
+ " 1/0/00 | \n",
+ " Corporate Auto | \n",
+ " SUV | \n",
+ " 720.752945 | \n",
+ "
\n",
+ " \n",
+ " 1061 | \n",
+ " RY92647 | \n",
+ " Cali | \n",
+ " F | \n",
+ " Bachelor | \n",
+ " 1050677.17% | \n",
+ " 0.0 | \n",
+ " 92.0 | \n",
+ " 1/0/00 | \n",
+ " Personal Auto | \n",
+ " Four-Door Car | \n",
+ " 546.524896 | \n",
+ "
\n",
+ " \n",
+ " 1068 | \n",
+ " GS98873 | \n",
+ " Arizona | \n",
+ " F | \n",
+ " Bachelor | \n",
+ " 323912.47% | \n",
+ " 16061.0 | \n",
+ " 88.0 | \n",
+ " 1/0/00 | \n",
+ " Personal Auto | \n",
+ " Four-Door Car | \n",
+ " 633.600000 | \n",
+ "
\n",
+ " \n",
+ " 1069 | \n",
+ " CW49887 | \n",
+ " California | \n",
+ " F | \n",
+ " Master | \n",
+ " 462680.11% | \n",
+ " 79487.0 | \n",
+ " 114.0 | \n",
+ " 1/0/00 | \n",
+ " Special Auto | \n",
+ " SUV | \n",
+ " 547.200000 | \n",
+ "
\n",
+ " \n",
+ " 1070 | \n",
+ " MY31220 | \n",
+ " California | \n",
+ " F | \n",
+ " College | \n",
+ " 899704.02% | \n",
+ " 54230.0 | \n",
+ " 112.0 | \n",
+ " 1/0/00 | \n",
+ " Personal Auto | \n",
+ " Two-Door Car | \n",
+ " 537.600000 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
264 rows × 11 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Customer ST GENDER Education Customer Lifetime Value Income \\\n",
+ "1 QZ44356 Arizona F Bachelor 697953.59% 0.0 \n",
+ "2 AI49188 Nevada F Bachelor 1288743.17% 48767.0 \n",
+ "17 OE15005 Cali NaN College 394524.16% 28855.0 \n",
+ "23 TZ98966 Nevada NaN Bachelor 245019.10% 0.0 \n",
+ "26 US89481 California NaN Bachelor 394637.21% 0.0 \n",
+ "... ... ... ... ... ... ... \n",
+ "1059 YG44474 Oregon M College 1401472.13% 54193.0 \n",
+ "1061 RY92647 Cali F Bachelor 1050677.17% 0.0 \n",
+ "1068 GS98873 Arizona F Bachelor 323912.47% 16061.0 \n",
+ "1069 CW49887 California F Master 462680.11% 79487.0 \n",
+ "1070 MY31220 California F College 899704.02% 54230.0 \n",
+ "\n",
+ " Monthly Premium Auto Number of Open Complaints Policy Type \\\n",
+ "1 94.0 1/0/00 Personal Auto \n",
+ "2 108.0 1/0/00 Personal Auto \n",
+ "17 101.0 1/0/00 Personal Auto \n",
+ "23 73.0 1/3/00 Corporate Auto \n",
+ "26 111.0 1/0/00 Personal Auto \n",
+ "... ... ... ... \n",
+ "1059 117.0 1/0/00 Corporate Auto \n",
+ "1061 92.0 1/0/00 Personal Auto \n",
+ "1068 88.0 1/0/00 Personal Auto \n",
+ "1069 114.0 1/0/00 Special Auto \n",
+ "1070 112.0 1/0/00 Personal Auto \n",
+ "\n",
+ " Vehicle Class Total Claim Amount \n",
+ "1 Four-Door Car 1131.464935 \n",
+ "2 Two-Door Car 566.472247 \n",
+ "17 SUV 647.442031 \n",
+ "23 Four-Door Car 554.376763 \n",
+ "26 Four-Door Car 799.200000 \n",
+ "... ... ... \n",
+ "1059 SUV 720.752945 \n",
+ "1061 Four-Door Car 546.524896 \n",
+ "1068 Four-Door Car 633.600000 \n",
+ "1069 SUV 547.200000 \n",
+ "1070 Two-Door Car 537.600000 \n",
+ "\n",
+ "[264 rows x 11 columns]"
+ ]
+ },
+ "execution_count": 82,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "max_claim = df[df[\"Total Claim Amount\"]>quantile_75]\n",
+ "max_claim #aqui estan los clientes que tiene un total Claim Amount superior al 75%"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 76,
+ "id": "62d7c63e",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Customer | \n",
+ " ST | \n",
+ " GENDER | \n",
+ " Education | \n",
+ " Customer Lifetime Value | \n",
+ " Income | \n",
+ " Monthly Premium Auto | \n",
+ " Number of Open Complaints | \n",
+ " Policy Type | \n",
+ " Vehicle Class | \n",
+ " Total Claim Amount | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " count | \n",
+ " 264 | \n",
+ " 264 | \n",
+ " 238 | \n",
+ " 264 | \n",
+ " 264 | \n",
+ " 264.000000 | \n",
+ " 264.000000 | \n",
+ " 264 | \n",
+ " 264 | \n",
+ " 264 | \n",
+ " 264.000000 | \n",
+ "
\n",
+ " \n",
+ " unique | \n",
+ " 264 | \n",
+ " 7 | \n",
+ " 5 | \n",
+ " 5 | \n",
+ " 256 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 6 | \n",
+ " 3 | \n",
+ " 6 | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " top | \n",
+ " QZ44356 | \n",
+ " Oregon | \n",
+ " F | \n",
+ " Bachelor | \n",
+ " 578018.22% | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 1/0/00 | \n",
+ " Personal Auto | \n",
+ " SUV | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " freq | \n",
+ " 1 | \n",
+ " 90 | \n",
+ " 115 | \n",
+ " 85 | \n",
+ " 3 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 206 | \n",
+ " 191 | \n",
+ " 101 | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " mean | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 23677.344697 | \n",
+ " 165.193182 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 782.228263 | \n",
+ "
\n",
+ " \n",
+ " std | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 27013.483721 | \n",
+ " 623.930992 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 292.751640 | \n",
+ "
\n",
+ " \n",
+ " min | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0.000000 | \n",
+ " 63.000000 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 537.600000 | \n",
+ "
\n",
+ " \n",
+ " 25% | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0.000000 | \n",
+ " 99.000000 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 606.521741 | \n",
+ "
\n",
+ " \n",
+ " 50% | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 18807.000000 | \n",
+ " 114.000000 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 679.597985 | \n",
+ "
\n",
+ " \n",
+ " 75% | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 42423.750000 | \n",
+ " 133.250000 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 851.400000 | \n",
+ "
\n",
+ " \n",
+ " max | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 99316.000000 | \n",
+ " 10202.000000 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 2893.239678 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Customer ST GENDER Education Customer Lifetime Value \\\n",
+ "count 264 264 238 264 264 \n",
+ "unique 264 7 5 5 256 \n",
+ "top QZ44356 Oregon F Bachelor 578018.22% \n",
+ "freq 1 90 115 85 3 \n",
+ "mean NaN NaN NaN NaN NaN \n",
+ "std NaN NaN NaN NaN NaN \n",
+ "min NaN NaN NaN NaN NaN \n",
+ "25% NaN NaN NaN NaN NaN \n",
+ "50% NaN NaN NaN NaN NaN \n",
+ "75% NaN NaN NaN NaN NaN \n",
+ "max NaN NaN NaN NaN NaN \n",
+ "\n",
+ " Income Monthly Premium Auto Number of Open Complaints \\\n",
+ "count 264.000000 264.000000 264 \n",
+ "unique NaN NaN 6 \n",
+ "top NaN NaN 1/0/00 \n",
+ "freq NaN NaN 206 \n",
+ "mean 23677.344697 165.193182 NaN \n",
+ "std 27013.483721 623.930992 NaN \n",
+ "min 0.000000 63.000000 NaN \n",
+ "25% 0.000000 99.000000 NaN \n",
+ "50% 18807.000000 114.000000 NaN \n",
+ "75% 42423.750000 133.250000 NaN \n",
+ "max 99316.000000 10202.000000 NaN \n",
+ "\n",
+ " Policy Type Vehicle Class Total Claim Amount \n",
+ "count 264 264 264.000000 \n",
+ "unique 3 6 NaN \n",
+ "top Personal Auto SUV NaN \n",
+ "freq 191 101 NaN \n",
+ "mean NaN NaN 782.228263 \n",
+ "std NaN NaN 292.751640 \n",
+ "min NaN NaN 537.600000 \n",
+ "25% NaN NaN 606.521741 \n",
+ "50% NaN NaN 679.597985 \n",
+ "75% NaN NaN 851.400000 \n",
+ "max NaN NaN 2893.239678 "
+ ]
+ },
+ "execution_count": 76,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "max_claim.describe(include=\"all\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 83,
+ "id": "ac8fbd36",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "(264, 11)"
+ ]
+ },
+ "execution_count": 83,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "max_claim.shape"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "319705ac",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "0fa81645",
+ "metadata": {},
+ "outputs": [],
+ "source": []
}
],
"metadata": {
@@ -251,7 +2338,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.9.13"
+ "version": "3.11.7"
}
},
"nbformat": 4,