From 4f30e7334378707fd527dcde227e8595688793b4 Mon Sep 17 00:00:00 2001 From: faezakamran Date: Sun, 22 Aug 2021 16:14:47 +0500 Subject: [PATCH] Pclass wise average age calculations added --- titanic-dataset-eda.ipynb | 1469 +++++++++++++++++++++++++++++++++++++ 1 file changed, 1469 insertions(+) create mode 100644 titanic-dataset-eda.ipynb diff --git a/titanic-dataset-eda.ipynb b/titanic-dataset-eda.ipynb new file mode 100644 index 0000000..0d91c19 --- /dev/null +++ b/titanic-dataset-eda.ipynb @@ -0,0 +1,1469 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "c3712364", + "metadata": { + "_cell_guid": "b1076dfc-b9ad-4769-8c92-a6c4dae69d19", + "_uuid": "8f2839f25d086af736a60e9eeb907d3b93b6e0e5", + "execution": { + "iopub.execute_input": "2021-08-20T01:43:14.788930Z", + "iopub.status.busy": "2021-08-20T01:43:14.788029Z", + "iopub.status.idle": "2021-08-20T01:43:15.691606Z", + "shell.execute_reply": "2021-08-20T01:43:15.690844Z", + "shell.execute_reply.started": "2021-08-20T00:57:12.467616Z" + }, + "papermill": { + "duration": 0.937281, + "end_time": "2021-08-20T01:43:15.691766", + "exception": false, + "start_time": "2021-08-20T01:43:14.754485", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "/kaggle/input/titanic/train.csv\n", + "/kaggle/input/titanic/test.csv\n", + "/kaggle/input/titanic/gender_submission.csv\n" + ] + } + ], + "source": [ + "# This Python 3 environment comes with many helpful analytics libraries installed\n", + "# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python\n", + "# For example, here's several helpful packages to load\n", + "\n", + "import numpy as np # linear algebra\n", + "import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)\n", + "import seaborn as sns\n", + "import matplotlib.pyplot as plt\n", + "# Input data files are available in the read-only \"../input/\" directory\n", + "# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory\n", + "\n", + "import os\n", + "for dirname, _, filenames in os.walk('/kaggle/input'):\n", + " for filename in filenames:\n", + " print(os.path.join(dirname, filename))\n", + "\n", + "# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using \"Save & Run All\" \n", + "# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "9b7d1704", + "metadata": { + "execution": { + "iopub.execute_input": "2021-08-20T01:43:15.735561Z", + "iopub.status.busy": "2021-08-20T01:43:15.734799Z", + "iopub.status.idle": "2021-08-20T01:43:15.780302Z", + "shell.execute_reply": "2021-08-20T01:43:15.779580Z", + "shell.execute_reply.started": "2021-08-20T00:57:12.477632Z" + }, + "papermill": { + "duration": 0.069009, + "end_time": "2021-08-20T01:43:15.780451", + "exception": false, + "start_time": "2021-08-20T01:43:15.711442", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
PassengerIdSurvivedPclassNameSexAgeSibSpParchTicketFareCabinEmbarked
0103Braund, Mr. Owen Harrismale22.010A/5 211717.2500NaNS
1211Cumings, Mrs. John Bradley (Florence Briggs Th...female38.010PC 1759971.2833C85C
2313Heikkinen, Miss. Lainafemale26.000STON/O2. 31012827.9250NaNS
3411Futrelle, Mrs. Jacques Heath (Lily May Peel)female35.01011380353.1000C123S
4503Allen, Mr. William Henrymale35.0003734508.0500NaNS
\n", + "
" + ], + "text/plain": [ + " PassengerId Survived Pclass \\\n", + "0 1 0 3 \n", + "1 2 1 1 \n", + "2 3 1 3 \n", + "3 4 1 1 \n", + "4 5 0 3 \n", + "\n", + " Name Sex Age SibSp \\\n", + "0 Braund, Mr. Owen Harris male 22.0 1 \n", + "1 Cumings, Mrs. John Bradley (Florence Briggs Th... female 38.0 1 \n", + "2 Heikkinen, Miss. Laina female 26.0 0 \n", + "3 Futrelle, Mrs. Jacques Heath (Lily May Peel) female 35.0 1 \n", + "4 Allen, Mr. William Henry male 35.0 0 \n", + "\n", + " Parch Ticket Fare Cabin Embarked \n", + "0 0 A/5 21171 7.2500 NaN S \n", + "1 0 PC 17599 71.2833 C85 C \n", + "2 0 STON/O2. 3101282 7.9250 NaN S \n", + "3 0 113803 53.1000 C123 S \n", + "4 0 373450 8.0500 NaN S " + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "train = pd.read_csv('/kaggle/input/titanic/train.csv')\n", + "train.head()" + ] + }, + { + "cell_type": "markdown", + "id": "14f9b2d2", + "metadata": { + "papermill": { + "duration": 0.01882, + "end_time": "2021-08-20T01:43:15.820403", + "exception": false, + "start_time": "2021-08-20T01:43:15.801583", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "# Missing Values" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "b2bdbfbd", + "metadata": { + "execution": { + "iopub.execute_input": "2021-08-20T01:43:15.861647Z", + "iopub.status.busy": "2021-08-20T01:43:15.861010Z", + "iopub.status.idle": "2021-08-20T01:43:15.885804Z", + "shell.execute_reply": "2021-08-20T01:43:15.886432Z", + "shell.execute_reply.started": "2021-08-20T00:57:12.536532Z" + }, + "papermill": { + "duration": 0.047108, + "end_time": "2021-08-20T01:43:15.886607", + "exception": false, + "start_time": "2021-08-20T01:43:15.839499", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
PassengerIdSurvivedPclassNameSexAgeSibSpParchTicketFareCabinEmbarked
0FalseFalseFalseFalseFalseFalseFalseFalseFalseFalseTrueFalse
1FalseFalseFalseFalseFalseFalseFalseFalseFalseFalseFalseFalse
2FalseFalseFalseFalseFalseFalseFalseFalseFalseFalseTrueFalse
3FalseFalseFalseFalseFalseFalseFalseFalseFalseFalseFalseFalse
4FalseFalseFalseFalseFalseFalseFalseFalseFalseFalseTrueFalse
.......................................
886FalseFalseFalseFalseFalseFalseFalseFalseFalseFalseTrueFalse
887FalseFalseFalseFalseFalseFalseFalseFalseFalseFalseFalseFalse
888FalseFalseFalseFalseFalseTrueFalseFalseFalseFalseTrueFalse
889FalseFalseFalseFalseFalseFalseFalseFalseFalseFalseFalseFalse
890FalseFalseFalseFalseFalseFalseFalseFalseFalseFalseTrueFalse
\n", + "

891 rows × 12 columns

\n", + "
" + ], + "text/plain": [ + " PassengerId Survived Pclass Name Sex Age SibSp Parch Ticket \\\n", + "0 False False False False False False False False False \n", + "1 False False False False False False False False False \n", + "2 False False False False False False False False False \n", + "3 False False False False False False False False False \n", + "4 False False False False False False False False False \n", + ".. ... ... ... ... ... ... ... ... ... \n", + "886 False False False False False False False False False \n", + "887 False False False False False False False False False \n", + "888 False False False False False True False False False \n", + "889 False False False False False False False False False \n", + "890 False False False False False False False False False \n", + "\n", + " Fare Cabin Embarked \n", + "0 False True False \n", + "1 False False False \n", + "2 False True False \n", + "3 False False False \n", + "4 False True False \n", + ".. ... ... ... \n", + "886 False True False \n", + "887 False False False \n", + "888 False True False \n", + "889 False False False \n", + "890 False True False \n", + "\n", + "[891 rows x 12 columns]" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "train.isnull()" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "0c42329f", + "metadata": { + "execution": { + "iopub.execute_input": "2021-08-20T01:43:15.930200Z", + "iopub.status.busy": "2021-08-20T01:43:15.929563Z", + "iopub.status.idle": "2021-08-20T01:43:16.150878Z", + "shell.execute_reply": "2021-08-20T01:43:16.149935Z", + "shell.execute_reply.started": "2021-08-20T00:57:12.569288Z" + }, + "papermill": { + "duration": 0.244178, + "end_time": "2021-08-20T01:43:16.151024", + "exception": false, + "start_time": "2021-08-20T01:43:15.906846", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "sns.heatmap(train.isnull(), yticklabels=False, cbar=False, cmap='viridis')" + ] + }, + { + "cell_type": "markdown", + "id": "8dbb9133", + "metadata": { + "papermill": { + "duration": 0.020473, + "end_time": "2021-08-20T01:43:16.192297", + "exception": false, + "start_time": "2021-08-20T01:43:16.171824", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "\n", + "Roughly 20 percent of the Age data is missing. The proportion of Age missing is likely small enough for reasonable replacement with some form of imputation. Looking at the Cabin column, it looks like we are just missing too much of that data to do something useful with at a basic level. We'll probably drop this later, or change it to another feature like \"Cabin Known: 1 or 0\"\n", + "\n", + "Let's continue on by visualizing some more of the data! Check out the video for full explanations over these plots, this code is just to serve as reference." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "2196905d", + "metadata": { + "execution": { + "iopub.execute_input": "2021-08-20T01:43:16.240706Z", + "iopub.status.busy": "2021-08-20T01:43:16.240095Z", + "iopub.status.idle": "2021-08-20T01:43:16.391994Z", + "shell.execute_reply": "2021-08-20T01:43:16.391371Z", + "shell.execute_reply.started": "2021-08-20T00:57:12.822743Z" + }, + "papermill": { + "duration": 0.179036, + "end_time": "2021-08-20T01:43:16.392141", + "exception": false, + "start_time": "2021-08-20T01:43:16.213105", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYUAAAEGCAYAAACKB4k+AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8rg+JYAAAACXBIWXMAAAsTAAALEwEAmpwYAAAUfklEQVR4nO3dfUyV9/3/8dcpp1CCgELknGr9LXHaSNQNs3V40kbnYUcsyEQqc3NjSrt1W6wO3eh0TS2rlfXGKemWbSEmju6bbvuWcNMNHShW6LY6E5V5k9NmtSG1jeccw4140x3geL5/mH1+tRV67OHiIDwff8F1znWdN+bCJ+cD5zq2cDgcFgAAku6I9QAAgLGDKAAADKIAADCIAgDAIAoAAMMe6wGi0dHRoYSEhFiPAQC3lWAwqKysrJvedltHISEhQZmZmbEeAwBuK16vd8jbWD4CABhEAQBgEAUAgEEUAAAGUQAAGEQBAGAQBQCAQRQAAAZRAAAYEz4KwYFQrEfAGMR5gYnqtr7MxUhIuDNOXyh/KdZjYIw59sK3Yz0CEBMT/pkCAOD/IwoAAIMoAAAMogAAMIgCAMAgCgAAgygAAAyiAAAwiAIAwCAKAACDKAAADKIAADCIAgDAsPQqqW63W0lJSbrjjjsUFxenuro69fb2atOmTXr//fc1ffp0VVVVKTU1VeFwWDt27FBbW5vuuusuPfvss5o7d66V4wEAPsLyZwo1NTVqbGxUXV2dJKm6uloul0stLS1yuVyqrq6WJLW3t6uzs1MtLS3avn27KioqrB4NAPARo7581NraqsLCQklSYWGhDh48eMN2m82mrKws9fX1KRAIjPZ4ADChWf4mO4888ohsNptWr16t1atXq6urSxkZGZKkqVOnqqurS5Lk9/vldDrNfk6nU36/39z3ZoLBoLxeb1TzZWZmRrU/xq9ozy3gdmRpFP7whz/I4XCoq6tLpaWlmjlz5g2322w22Wy2T338hIQE/lOHZTi3MF4N9wOPpctHDodDkpSeni6Px6OTJ08qPT3dLAsFAgGlpaWZ+/p8PrOvz+cz+wMARodlUbh69aouX75sPv773/+u2bNny+12q6GhQZLU0NCgnJwcSTLbw+GwOjo6lJycPOzSEQBg5Fm2fNTV1aX169dLkkKhkJYvX65FixZp/vz5KisrU21traZNm6aqqipJ0uLFi9XW1iaPx6PExERVVlZaNRoAYAi2cDgcjvUQn5bX6x2Rdd8vlL80AtNgPDn2wrdjPQJgmeH+7+QVzQAAgygAAAyiAAAwiAIAwCAKAACDKAAADKIAADCIAgDAIAoAAIMoAAAMogAAMIgCAMAgCgAAgygAAAyiAAAwiAIAwCAKAACDKAAADKIAADCIAgDAIAoAAIMoAAAMogAAMIgCAMAgCgAAgygAAAyiAAAwiAIAwCAKAADD8iiEQiEVFhbqe9/7niTp3LlzKi4ulsfjUVlZmfr7+yVJ/f39Kisrk8fjUXFxsd577z2rRwMAfITlUXjppZf02c9+1ny+c+dOrVu3TgcOHFBKSopqa2slSa+88opSUlJ04MABrVu3Tjt37rR6NADAR1gaBZ/Pp8OHD2vVqlWSpHA4rCNHjig3N1eStHLlSrW2tkqSDh06pJUrV0qScnNz9cYbbygcDls5HgDgI+xWHryyslLl5eW6cuWKJKmnp0cpKSmy268/rNPplN/vlyT5/X7dfffd14ey25WcnKyenh6lpaUNefxgMCiv1xvVjJmZmVHtj/Er2nMLuB1ZFoXXXntNaWlpmjdvnv75z39a8hgJCQn8pw7LcG5hvBruBx7LonD8+HEdOnRI7e3tCgaDunz5snbs2KG+vj4NDg7KbrfL5/PJ4XBIkhwOh86fPy+n06nBwUFdunRJU6ZMsWo8AMBNWPY7hR/96Edqb2/XoUOHtGvXLi1cuFC/+MUvlJ2drebmZklSfX293G63JMntdqu+vl6S1NzcrIULF8pms1k1HgDgJkb9dQrl5eXau3evPB6Pent7VVxcLElatWqVent75fF4tHfvXv34xz8e7dEAYMKzhW/jP/Hxer0jsu77hfKXRmAajCfHXvh2rEcALDPc/528ohkAYBAFAIBBFAAABlEAABhEAQBgEAUAgEEUAAAGUQAAGEQBAGAQBQCAQRQAAAZRAAAYRAEAYBAFAIBBFAAABlEAABhEARijwoPBWI+AMcjq88Ju6dEBfGo2e4LefXp+rMfAGPP/tp2y9Pg8UwAAGEQBAGAQBQCAQRQAAAZRAAAYRAEAYBAFAIARURTWrl0b0TYAwO1t2BevBYNBffDBB+rp6dHFixcVDoclSZcvX5bf7x+VAQEAo2fYKPzxj39UTU2NAoGAioqKTBQmTZqkb33rW6MyIABg9AwbhbVr12rt2rX6/e9/r5KSkls6cDAY1De/+U319/crFAopNzdXGzdu1Llz57R582b19vZq7ty5ev755xUfH6/+/n49/vjjOnPmjCZPnqzdu3frnnvuieqLAwDcmoiufVRSUqLjx4/r/fffVygUMtsLCwuH3Cc+Pl41NTVKSkrSwMCA1qxZo0WLFmnv3r1at26d8vPztW3bNtXW1mrNmjV65ZVXlJKSogMHDqipqUk7d+5UVVVVtF8fAOAWRPSL5vLycj3//PM6duyYTp06pVOnTun06dPD7mOz2ZSUlCRJGhwc1ODgoGw2m44cOaLc3FxJ0sqVK9Xa2ipJOnTokFauXClJys3N1RtvvGGWqwAAoyOiZwqnT5/Wvn37ZLPZbungoVBIRUVFevfdd7VmzRrNmDFDKSkpstuvP6zT6TS/sPb7/br77ruvD2W3Kzk5WT09PUpLS7ulxwQAfHoRRWH27Nm6cOGCMjIybungcXFxamxsVF9fn9avX6933nnnUw05lGAwKK/XG9UxMjMzR2gajDfRnlvR4tzEUKw8NyOKQk9Pj/Lz8/W5z31Od955p9n+29/+NqIHSUlJUXZ2tjo6OtTX16fBwUHZ7Xb5fD45HA5JksPh0Pnz5+V0OjU4OKhLly5pypQpwx43ISGBbxxYhnMLY1W05+ZwUYkoChs2bLjlB+3u7pbdbldKSor+85//6B//+Ie++93vKjs7W83NzcrPz1d9fb3cbrckye12q76+XgsWLFBzc7MWLlx4y8tVAIDoRBSFL33pS7d84EAgoC1btigUCikcDmvZsmVasmSJZs2apU2bNqmqqkqZmZkqLi6WJK1atUrl5eXyeDxKTU3V7t27b/kxAQDRiSgKCxYsMD+1DwwMaHBwUImJiTp+/PiQ+8yZM0cNDQ0f2z5jxgzV1tZ+bHtCQoJefPHFCMcGAFghoiicOHHCfBwOh9Xa2qqOjg6rZgIAxMgtXyXVZrPpK1/5iv72t79ZMQ8AIIYieqbQ0tJiPr527ZpOnz6thIQEy4YCAMRGRFF47bXXzMdxcXGaPn26fv3rX1s2FAAgNiKKws9//nOr5wAAjAER/U7B5/Np/fr1crlccrlc2rBhg3w+n9WzAQBGWURR2Lp1q9xut15//XW9/vrrWrJkibZu3Wr1bACAURZRFLq7u/XQQw/JbrfLbrerqKhI3d3dVs8GABhlEUVh8uTJamxsVCgUUigUUmNjoyZPnmzxaACA0RZRFCorK7V//37df//9euCBB9Tc3Kxnn33W6tkAAKMsor8+evHFF/Xcc88pNTVVktTb26vnnnuOv0oCgHEmomcKb731lgmCdH05KdbXmgcAjLyIonDt2jVdvHjRfN7b23vDezUDAMaHiJaPHn74Ya1evVrLli2TJP31r3/V97//fUsHAwCMvoiiUFhYqHnz5unIkSOSpF/96leaNWuWpYMBAEZfRFGQpFmzZhECABjnbvnS2QCA8YsoAAAMogAAMIgCAMAgCgAAgygAAAyiAAAwiAIAwCAKAACDKAAADKIAADCIAgDAIAoAAMOyKJw/f14lJSXKy8tTfn6+ampqJF1/g57S0lItXbpUpaWl5s17wuGwnnnmGXk8HhUUFOjMmTNWjQYAGIJlUYiLi9OWLVu0b98+/elPf9LLL7+st99+W9XV1XK5XGppaZHL5VJ1dbUkqb29XZ2dnWppadH27dtVUVFh1WgAgCFYFoWMjAzNnTtXkjRp0iTNnDlTfr9fra2tKiwslHT9zXsOHjwoSWa7zWZTVlaW+vr6FAgErBoPAHATEb/JTjTee+89eb1eff7zn1dXV5cyMjIkSVOnTlVXV5ckye/3y+l0mn2cTqf8fr+5780Eg0F5vd6oZsvMzIxqf4xf0Z5b0eLcxFCsPDctj8KVK1e0ceNG/fSnP9WkSZNuuM1ms8lms33qYyckJPCNA8twbmGsivbcHC4qlv710cDAgDZu3KiCggItXbpUkpSenm6WhQKBgNLS0iRJDodDPp/P7Ovz+eRwOKwcDwDwEZZFIRwO64knntDMmTNVWlpqtrvdbjU0NEiSGhoalJOTc8P2cDisjo4OJScnD7t0BAAYeZYtHx07dkyNjY269957tWLFCknS5s2b9eijj6qsrEy1tbWaNm2aqqqqJEmLFy9WW1ubPB6PEhMTVVlZadVoAIAhWBaFL37xi3rrrbduett/X7PwYTabTU899ZRV4wAAIsArmgEABlEAABhEAQBgEAUAgEEUAAAGUQAAGEQBAGAQBQCAQRQAAAZRAAAYRAEAYBAFAIBBFAAABlEAABhEAQBgEAUAgEEUAAAGUQAAGEQBAGAQBQCAQRQAAAZRAAAYRAEAYBAFAIBBFAAABlEAABhEAQBgEAUAgEEUAACGZVHYunWrXC6Xli9fbrb19vaqtLRUS5cuVWlpqS5evChJCofDeuaZZ+TxeFRQUKAzZ85YNRYAYBiWRaGoqEh79uy5YVt1dbVcLpdaWlrkcrlUXV0tSWpvb1dnZ6daWlq0fft2VVRUWDUWAGAYlkXhvvvuU2pq6g3bWltbVVhYKEkqLCzUwYMHb9hus9mUlZWlvr4+BQIBq0YDAAzBPpoP1tXVpYyMDEnS1KlT1dXVJUny+/1yOp3mfk6nU36/39x3KMFgUF6vN6qZMjMzo9of41e051a0ODcxFCvPzVGNwofZbDbZbLaojpGQkMA3DizDuYWxKtpzc7iojOpfH6Wnp5tloUAgoLS0NEmSw+GQz+cz9/P5fHI4HKM5GgBAoxwFt9uthoYGSVJDQ4NycnJu2B4Oh9XR0aHk5ORPXDoCAIw8y5aPNm/erKNHj6qnp0eLFi3Shg0b9Oijj6qsrEy1tbWaNm2aqqqqJEmLFy9WW1ubPB6PEhMTVVlZadVYAIBhWBaFXbt23XR7TU3Nx7bZbDY99dRTVo0CAIgQr2gGABhEAQBgEAUAgEEUAAAGUQAAGEQBAGAQBQCAQRQAAAZRAAAYRAEAYBAFAIBBFAAABlEAABhEAQBgEAUAgEEUAAAGUQAAGEQBAGAQBQCAQRQAAAZRAAAYRAEAYBAFAIBBFAAABlEAABhEAQBgEAUAgEEUAAAGUQAAGGMqCu3t7crNzZXH41F1dXWsxwGACWfMRCEUCunpp5/Wnj171NTUpL/85S96++23Yz0WAEwoYyYKJ0+e1Gc+8xnNmDFD8fHxys/PV2tra6zHAoAJxR7rAf7L7/fL6XSazx0Oh06ePDnsPsFgUF6vN+rH/p+H74v6GBhfRuK8GhHF/xvrCTDGjMS5GQwGh7xtzETh08jKyor1CAAwroyZ5SOHwyGfz2c+9/v9cjgcMZwIACaeMROF+fPnq7OzU+fOnVN/f7+amprkdrtjPRYATChjZvnIbrdr27Zt+s53vqNQKKSHHnpIs2fPjvVYADCh2MLhcDjWQwAAxoYxs3wEAIg9ogAAMIgCuLwIxqytW7fK5XJp+fLlsR5lwiAKExyXF8FYVlRUpD179sR6jAmFKExwXF4EY9l9992n1NTUWI8xoRCFCe5mlxfx+/0xnAhALBEFAIBBFCY4Li8C4MOIwgTH5UUAfBivaIba2tpUWVlpLi/ygx/8INYjAZKkzZs36+jRo+rp6VF6ero2bNig4uLiWI81rhEFAIDB8hEAwCAKAACDKAAADKIAADCIAgDAIAqApN/85jfKz89XQUGBVqxYoX/9619RH7O1tXXErjq7YMGCETkO8EnGzNtxArFy4sQJHT58WPX19YqPj1d3d7cGBgYi2ndwcFB2+82/jXJycpSTkzOSowKW45kCJrwLFy5oypQpio+PlySlpaXJ4XDI7Xaru7tbknTq1CmVlJRIkn75y1+qvLxcX//61/X444/ra1/7mv7973+b45WUlOjUqVOqq6vT008/rUuXLmnJkiW6du2aJOnq1atavHixBgYG9O677+qRRx5RUVGR1qxZo7Nnz0qSzp07p9WrV6ugoEC7d+8ezX8OTHBEARPe/fffr/Pnzys3N1cVFRU6evToJ+5z9uxZ/e53v9OuXbuUl5en/fv3S5ICgYACgYDmz59v7pucnKw5c+aY4x4+fFgPPPCA7rzzTj355JN68sknVVdXp5/85Cf62c9+JknasWOHvvGNb+jPf/6zMjIyLPiqgZsjCpjwkpKSzE/1aWlp2rRpk+rq6obdx+1266677pIkPfjgg2pubpYk7d+/X8uWLfvY/fPy8rRv3z5JUlNTk/Ly8nTlyhWdOHFCP/zhD7VixQpt27ZNFy5ckHR9SSs/P1+StGLFihH7WoFPwu8UAElxcXHKzs5Wdna27r33XjU0NCguLk7/vQpMMBi84f6JiYnmY4fDocmTJ+vNN9/U/v37VVFR8bHju91u7d69W729vTpz5owWLlyoDz74QCkpKWpsbLzpTDabbeS+QCBCPFPAhPfOO++os7PTfO71ejVt2jRNnz5dp0+fliS1tLQMe4y8vDzt2bNHly5d0pw5cz52e1JSkubNm6cdO3boy1/+suLi4jRp0iTdc889ZukpHA7rzTfflHT9r42ampokSa+++upIfJlARIgCJryrV69qy5YtysvLU0FBgc6ePavHHntMjz32mCorK1VUVKS4uLhhj5Gbm6t9+/bpwQcfHPI+eXl5evXVV5WXl2e2vfDCC6qtrdVXv/pV5efn6+DBg5KkJ554Qi+//LIKCgp4JzyMKq6SCgAweKYAADCIAgDAIAoAAIMoAAAMogAAMIgCAMAgCgAA4/8ARKHjbY321MIAAAAASUVORK5CYII=\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "sns.set_style('whitegrid')\n", + "sns.countplot(x='Survived', data=train)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "b0e3d272", + "metadata": { + "execution": { + "iopub.execute_input": "2021-08-20T01:43:16.445462Z", + "iopub.status.busy": "2021-08-20T01:43:16.444741Z", + "iopub.status.idle": "2021-08-20T01:43:16.654052Z", + "shell.execute_reply": "2021-08-20T01:43:16.654512Z", + "shell.execute_reply.started": "2021-08-20T00:57:12.982583Z" + }, + "papermill": { + "duration": 0.240764, + "end_time": "2021-08-20T01:43:16.654689", + "exception": false, + "start_time": "2021-08-20T01:43:16.413925", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "sns.set_style('whitegrid')\n", + "sns.countplot(x='Survived', hue='Sex', data=train, palette='RdBu_r')" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "233a65d0", + "metadata": { + "execution": { + "iopub.execute_input": "2021-08-20T01:43:16.707280Z", + "iopub.status.busy": "2021-08-20T01:43:16.706254Z", + "iopub.status.idle": "2021-08-20T01:43:16.937277Z", + "shell.execute_reply": "2021-08-20T01:43:16.936678Z", + "shell.execute_reply.started": "2021-08-20T00:57:13.171869Z" + }, + "papermill": { + "duration": 0.260001, + "end_time": "2021-08-20T01:43:16.937420", + "exception": false, + "start_time": "2021-08-20T01:43:16.677419", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# Let's check for passenger class\n", + "sns.set_style('whitegrid')\n", + "sns.countplot(x='Survived', hue='Pclass', data=train, palette='rainbow')" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "6d76e11d", + "metadata": { + "execution": { + "iopub.execute_input": "2021-08-20T01:43:16.990637Z", + "iopub.status.busy": "2021-08-20T01:43:16.989975Z", + "iopub.status.idle": "2021-08-20T01:43:17.344513Z", + "shell.execute_reply": "2021-08-20T01:43:17.343972Z", + "shell.execute_reply.started": "2021-08-20T00:57:13.406433Z" + }, + "papermill": { + "duration": 0.383422, + "end_time": "2021-08-20T01:43:17.344654", + "exception": false, + "start_time": "2021-08-20T01:43:16.961232", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/opt/conda/lib/python3.7/site-packages/seaborn/distributions.py:2557: FutureWarning: `distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms).\n", + " warnings.warn(msg, FutureWarning)\n" + ] + }, + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "sns.distplot(train['Age'].dropna(),kde=False,color='darkred',bins=40)" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "65e61a96", + "metadata": { + "execution": { + "iopub.execute_input": "2021-08-20T01:43:17.400717Z", + "iopub.status.busy": "2021-08-20T01:43:17.400068Z", + "iopub.status.idle": "2021-08-20T01:43:17.750934Z", + "shell.execute_reply": "2021-08-20T01:43:17.751414Z", + "shell.execute_reply.started": "2021-08-20T00:57:13.802719Z" + }, + "papermill": { + "duration": 0.381464, + "end_time": "2021-08-20T01:43:17.751589", + "exception": false, + "start_time": "2021-08-20T01:43:17.370125", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "train['Age'].hist(bins=30, color='darkred', alpha=0.3)" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "39a6d73e", + "metadata": { + "execution": { + "iopub.execute_input": "2021-08-20T01:43:17.810271Z", + "iopub.status.busy": "2021-08-20T01:43:17.809257Z", + "iopub.status.idle": "2021-08-20T01:43:18.147027Z", + "shell.execute_reply": "2021-08-20T01:43:18.146368Z", + "shell.execute_reply.started": "2021-08-20T00:57:14.140942Z" + }, + "papermill": { + "duration": 0.369245, + "end_time": "2021-08-20T01:43:18.147165", + "exception": false, + "start_time": "2021-08-20T01:43:17.777920", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# Siblings and Spouse\n", + "sns.countplot(x='SibSp', data=train)" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "143974a5", + "metadata": { + "execution": { + "iopub.execute_input": "2021-08-20T01:43:18.211096Z", + "iopub.status.busy": "2021-08-20T01:43:18.207564Z", + "iopub.status.idle": "2021-08-20T01:43:18.486968Z", + "shell.execute_reply": "2021-08-20T01:43:18.486293Z", + "shell.execute_reply.started": "2021-08-20T00:57:14.471086Z" + }, + "papermill": { + "duration": 0.312857, + "end_time": "2021-08-20T01:43:18.487118", + "exception": false, + "start_time": "2021-08-20T01:43:18.174261", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "sns.countplot(x='SibSp', hue='Sex', data=train)" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "a53f25de", + "metadata": { + "execution": { + "iopub.execute_input": "2021-08-20T01:43:18.569605Z", + "iopub.status.busy": "2021-08-20T01:43:18.566457Z", + "iopub.status.idle": "2021-08-20T01:43:18.884065Z", + "shell.execute_reply": "2021-08-20T01:43:18.883385Z", + "shell.execute_reply.started": "2021-08-20T00:57:14.740834Z" + }, + "papermill": { + "duration": 0.368654, + "end_time": "2021-08-20T01:43:18.884213", + "exception": false, + "start_time": "2021-08-20T01:43:18.515559", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "train['Fare'].hist(color='green',bins=40,figsize=(8,4))" + ] + }, + { + "cell_type": "markdown", + "id": "8f93e687", + "metadata": { + "papermill": { + "duration": 0.029983, + "end_time": "2021-08-20T01:43:18.945408", + "exception": false, + "start_time": "2021-08-20T01:43:18.915425", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "# Data Cleaning\n", + "\n", + "We want to fill in missing age data instead of just dropping the missing age data rows. One way to do this is by filling in the mean age of all the passengers (imputation). However we can be smarter about this and check the average age by passenger class. For example:" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "19f65c92", + "metadata": { + "execution": { + "iopub.execute_input": "2021-08-20T01:43:19.013901Z", + "iopub.status.busy": "2021-08-20T01:43:19.011313Z", + "iopub.status.idle": "2021-08-20T01:43:19.253768Z", + "shell.execute_reply": "2021-08-20T01:43:19.253256Z", + "shell.execute_reply.started": "2021-08-20T00:57:15.053293Z" + }, + "papermill": { + "duration": 0.278652, + "end_time": "2021-08-20T01:43:19.253927", + "exception": false, + "start_time": "2021-08-20T01:43:18.975275", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "plt.figure(figsize=(12, 7))\n", + "sns.boxplot(x='Pclass',y='Age',data=train,palette='winter')" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "f1411c1e", + "metadata": { + "execution": { + "iopub.execute_input": "2021-08-20T01:43:19.323535Z", + "iopub.status.busy": "2021-08-20T01:43:19.322500Z", + "iopub.status.idle": "2021-08-20T01:43:19.327981Z", + "shell.execute_reply": "2021-08-20T01:43:19.327333Z", + "shell.execute_reply.started": "2021-08-20T01:34:28.731272Z" + }, + "papermill": { + "duration": 0.043894, + "end_time": "2021-08-20T01:43:19.328131", + "exception": false, + "start_time": "2021-08-20T01:43:19.284237", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Pclass\n", + "1 38.233441\n", + "2 29.877630\n", + "3 25.140620\n", + "Name: Age, dtype: float64\n" + ] + } + ], + "source": [ + "# Pclass wise average age\n", + "data = train.copy()\n", + "pclass_avrg = [data.groupby(feature)['Age'].mean() for feature in data if feature == 'Pclass']\n", + "\n", + "print(pclass_avrg[0])" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "57c6e705", + "metadata": { + "execution": { + "iopub.execute_input": "2021-08-20T01:43:19.399020Z", + "iopub.status.busy": "2021-08-20T01:43:19.398208Z", + "iopub.status.idle": "2021-08-20T01:43:19.403368Z", + "shell.execute_reply": "2021-08-20T01:43:19.402525Z", + "shell.execute_reply.started": "2021-08-20T01:38:33.015669Z" + }, + "papermill": { + "duration": 0.04158, + "end_time": "2021-08-20T01:43:19.403555", + "exception": false, + "start_time": "2021-08-20T01:43:19.361975", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Pclass 1 Average Age: 38\n", + "Pclass 2 Average Age: 29\n", + "Pclass 3 Average Age: 25\n" + ] + } + ], + "source": [ + "pclass1_avg_age = int(pclass_avrg[0][1])\n", + "pclass2_avg_age = int(pclass_avrg[0][2])\n", + "pclass3_avg_age = int(pclass_avrg[0][3])\n", + "\n", + "print('Pclass 1 Average Age: ', pclass1_avg_age)\n", + "print('Pclass 2 Average Age: ', pclass2_avg_age)\n", + "print('Pclass 3 Average Age: ', pclass3_avg_age)" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "a6401dc8", + "metadata": { + "execution": { + "iopub.execute_input": "2021-08-20T01:43:19.473120Z", + "iopub.status.busy": "2021-08-20T01:43:19.472207Z", + "iopub.status.idle": "2021-08-20T01:43:19.475211Z", + "shell.execute_reply": "2021-08-20T01:43:19.475651Z", + "shell.execute_reply.started": "2021-08-20T01:38:42.286718Z" + }, + "papermill": { + "duration": 0.040295, + "end_time": "2021-08-20T01:43:19.475823", + "exception": false, + "start_time": "2021-08-20T01:43:19.435528", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "def impute_age(cols):\n", + " Age = cols[0]\n", + " Pclass = cols[1]\n", + " \n", + " if pd.isnull(Age):\n", + "\n", + " if Pclass == 1:\n", + " return pclass1_avg_age\n", + "\n", + " elif Pclass == 2:\n", + " return pclass2_avg_age\n", + "\n", + " else:\n", + " return pclass3_avg_age\n", + "\n", + " else:\n", + " return Age" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "3aa934ea", + "metadata": { + "execution": { + "iopub.execute_input": "2021-08-20T01:43:19.544003Z", + "iopub.status.busy": "2021-08-20T01:43:19.543014Z", + "iopub.status.idle": "2021-08-20T01:43:19.563143Z", + "shell.execute_reply": "2021-08-20T01:43:19.562550Z", + "shell.execute_reply.started": "2021-08-20T01:39:58.870020Z" + }, + "papermill": { + "duration": 0.056559, + "end_time": "2021-08-20T01:43:19.563279", + "exception": false, + "start_time": "2021-08-20T01:43:19.506720", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "# Impute age for null values\n", + "train['Age'] = train[['Age','Pclass']].apply(impute_age,axis=1)" + ] + }, + { + "cell_type": "markdown", + "id": "a36ee30d", + "metadata": { + "papermill": { + "duration": 0.03111, + "end_time": "2021-08-20T01:43:19.625169", + "exception": false, + "start_time": "2021-08-20T01:43:19.594059", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "Now let's check that heat map again!" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "8f795339", + "metadata": { + "execution": { + "iopub.execute_input": "2021-08-20T01:43:19.710538Z", + "iopub.status.busy": "2021-08-20T01:43:19.709412Z", + "iopub.status.idle": "2021-08-20T01:43:19.950468Z", + "shell.execute_reply": "2021-08-20T01:43:19.949811Z", + "shell.execute_reply.started": "2021-08-20T01:40:37.399955Z" + }, + "papermill": { + "duration": 0.29485, + "end_time": "2021-08-20T01:43:19.950604", + "exception": false, + "start_time": "2021-08-20T01:43:19.655754", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "sns.heatmap(train.isnull(),yticklabels=False,cbar=False,cmap='viridis')" + ] + }, + { + "cell_type": "markdown", + "id": "577c64ae", + "metadata": { + "papermill": { + "duration": 0.033257, + "end_time": "2021-08-20T01:43:20.016237", + "exception": false, + "start_time": "2021-08-20T01:43:19.982980", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "Cabin column has a lot of missing values so we are going to drop this column" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "5bbdf275", + "metadata": { + "execution": { + "iopub.execute_input": "2021-08-20T01:43:20.098710Z", + "iopub.status.busy": "2021-08-20T01:43:20.085507Z", + "iopub.status.idle": "2021-08-20T01:43:20.103610Z", + "shell.execute_reply": "2021-08-20T01:43:20.103113Z", + "shell.execute_reply.started": "2021-08-20T01:42:19.878524Z" + }, + "papermill": { + "duration": 0.055447, + "end_time": "2021-08-20T01:43:20.103747", + "exception": false, + "start_time": "2021-08-20T01:43:20.048300", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
PassengerIdSurvivedPclassNameSexAgeSibSpParchTicketFareEmbarked
0103Braund, Mr. Owen Harrismale22.010A/5 211717.2500S
1211Cumings, Mrs. John Bradley (Florence Briggs Th...female38.010PC 1759971.2833C
2313Heikkinen, Miss. Lainafemale26.000STON/O2. 31012827.9250S
3411Futrelle, Mrs. Jacques Heath (Lily May Peel)female35.01011380353.1000S
4503Allen, Mr. William Henrymale35.0003734508.0500S
\n", + "
" + ], + "text/plain": [ + " PassengerId Survived Pclass \\\n", + "0 1 0 3 \n", + "1 2 1 1 \n", + "2 3 1 3 \n", + "3 4 1 1 \n", + "4 5 0 3 \n", + "\n", + " Name Sex Age SibSp \\\n", + "0 Braund, Mr. Owen Harris male 22.0 1 \n", + "1 Cumings, Mrs. John Bradley (Florence Briggs Th... female 38.0 1 \n", + "2 Heikkinen, Miss. Laina female 26.0 0 \n", + "3 Futrelle, Mrs. Jacques Heath (Lily May Peel) female 35.0 1 \n", + "4 Allen, Mr. William Henry male 35.0 0 \n", + "\n", + " Parch Ticket Fare Embarked \n", + "0 0 A/5 21171 7.2500 S \n", + "1 0 PC 17599 71.2833 C \n", + "2 0 STON/O2. 3101282 7.9250 S \n", + "3 0 113803 53.1000 S \n", + "4 0 373450 8.0500 S " + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "train.drop('Cabin',axis=1,inplace=True)\n", + "train.head()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.10" + }, + "papermill": { + "default_parameters": {}, + "duration": 14.837901, + "end_time": "2021-08-20T01:43:21.606786", + "environment_variables": {}, + "exception": null, + "input_path": "__notebook__.ipynb", + "output_path": "__notebook__.ipynb", + "parameters": {}, + "start_time": "2021-08-20T01:43:06.768885", + "version": "2.3.3" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}