diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..feaca95 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +.ipynb_checkpoints +.vscode diff --git a/Customer_Segmentation_with_RFM analysis/notebook/Customer_segentation_with_RFM-MK.ipynb b/Customer_Segmentation_with_RFM analysis/notebook/Customer_segentation_with_RFM-MK.ipynb new file mode 100644 index 0000000..2171bd1 --- /dev/null +++ b/Customer_Segmentation_with_RFM analysis/notebook/Customer_segentation_with_RFM-MK.ipynb @@ -0,0 +1,2361 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Customer Segmentation using RFM analysis\n", + "\n", + "This is a transnational data set which contains all the transactions occurring between 01/12/2010 and 09/12/2011 for a UK-based and registered non-store online retail.The company mainly sells unique all-occasion gifts. Many customers of the company are wholesalers.\n", + "\n", + "We will create cutomer segments as per the Recency,Frequency and Monetary analysis by analyzing the data to know our customer base. This knowlwdge can then be used to target customers to retain customers, pitch offers etc" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Source \n", + "UCI ML Repo - [Online Retail Data Set](https://archive.ics.uci.edu/ml/datasets/online+retail)\n", + "\n", + "\n", + "## Attribute Information:\n", + "|Column|Description|Type|\n", + "|---|---|---|\n", + "|InvoiceNo| Invoice number.| Nominal, a 6-digit integral number uniquely assigned to each transaction. If this code starts with letter 'c', it indicates a cancellation.|\n", + "|StockCode| Product (item) code. |Nominal, a 5-digit integral number uniquely assigned to each distinct product.|\n", + "|Description| Product (item) name.| Nominal.|\n", + "|Quantity| The quantities of each product (item) per transaction.| Numeric.|\n", + "|InvoiceDate| Invice Date and time. |Numeric, the day and time when each transaction was generated.|\n", + "|UnitPrice| Unit price.| Numeric, Product price per unit in sterling.|\n", + "|CustomerID| Customer number.| Nominal, a 5-digit integral number uniquely assigned to each customer.|\n", + "|Country| Country name.| Nominal, the name of the country where each customer resides.|\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Importing Libraries" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import pandas as pd\n", + "\n", + "\n", + "import time, warnings\n", + "import datetime as dt\n", + "\n", + "#visualizations\n", + "import matplotlib.pyplot as plt\n", + "from pandas.plotting import scatter_matrix\n", + "%matplotlib inline\n", + "import seaborn as sns\n", + "\n", + "warnings.filterwarnings(\"ignore\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Read the data" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
InvoiceNoStockCodeDescriptionQuantityInvoiceDateUnitPriceCustomerIDCountry
054522021955DOORMAT UNION JACK GUNS AND ROSES23/1/2011 8:307.9514620.0United Kingdom
154522048194DOORMAT HEARTS23/1/2011 8:307.9514620.0United Kingdom
254522022556PLASTERS IN TIN CIRCUS PARADE123/1/2011 8:301.6514620.0United Kingdom
354522022139RETROSPOT TEA SET CERAMIC 11 PC33/1/2011 8:304.9514620.0United Kingdom
454522084029GKNITTED UNION FLAG HOT WATER BOTTLE43/1/2011 8:303.7514620.0United Kingdom
\n", + "
" + ], + "text/plain": [ + " InvoiceNo StockCode Description Quantity \\\n", + "0 545220 21955 DOORMAT UNION JACK GUNS AND ROSES 2 \n", + "1 545220 48194 DOORMAT HEARTS 2 \n", + "2 545220 22556 PLASTERS IN TIN CIRCUS PARADE 12 \n", + "3 545220 22139 RETROSPOT TEA SET CERAMIC 11 PC 3 \n", + "4 545220 84029G KNITTED UNION FLAG HOT WATER BOTTLE 4 \n", + "\n", + " InvoiceDate UnitPrice CustomerID Country \n", + "0 3/1/2011 8:30 7.95 14620.0 United Kingdom \n", + "1 3/1/2011 8:30 7.95 14620.0 United Kingdom \n", + "2 3/1/2011 8:30 1.65 14620.0 United Kingdom \n", + "3 3/1/2011 8:30 4.95 14620.0 United Kingdom \n", + "4 3/1/2011 8:30 3.75 14620.0 United Kingdom " + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data = pd.read_csv('../data/commercial_data.csv')\n", + "data.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Checking for cancelled orders" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
InvoiceNoStockCodeDescriptionQuantityInvoiceDateUnitPriceCustomerIDCountry
173363A563185BAdjust bad debt18/12/2011 14:5011062.06NaNUnited Kingdom
173364A563186BAdjust bad debt18/12/2011 14:51-11062.06NaNUnited Kingdom
173365A563187BAdjust bad debt18/12/2011 14:52-11062.06NaNUnited Kingdom
\n", + "
" + ], + "text/plain": [ + " InvoiceNo StockCode Description Quantity InvoiceDate \\\n", + "173363 A563185 B Adjust bad debt 1 8/12/2011 14:50 \n", + "173364 A563186 B Adjust bad debt 1 8/12/2011 14:51 \n", + "173365 A563187 B Adjust bad debt 1 8/12/2011 14:52 \n", + "\n", + " UnitPrice CustomerID Country \n", + "173363 11062.06 NaN United Kingdom \n", + "173364 -11062.06 NaN United Kingdom \n", + "173365 -11062.06 NaN United Kingdom " + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data[data.InvoiceNo.str.contains('\\D').replace(pd.NA,False)]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "no cancel orders, but some bad debt corrections, however there seem to be missing customer ID.." + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
InvoiceNoStockCodeDescriptionQuantityInvoiceDateUnitPriceCustomerIDCountry
7354523020960WATERMELON BATH SPONGE13/1/2011 9:552.46NaNUnited Kingdom
7454523021082SET/20 FRUIT SALAD PAPER NAPKINS13/1/2011 9:551.63NaNUnited Kingdom
7554523021488RED WHITE SCARF HOT WATER BOTTLE13/1/2011 9:558.29NaNUnited Kingdom
7654523035970ZINC FOLKART SLEIGH BELLS13/1/2011 9:554.13NaNUnited Kingdom
7754523082583HOT BATHS METAL SIGN13/1/2011 9:554.13NaNUnited Kingdom
7854523082583HOT BATHS METAL SIGN73/1/2011 9:554.96NaNUnited Kingdom
33854529921730GLASS STAR FROSTED T-LIGHT HOLDER13/1/2011 12:194.95NaNUnited Kingdom
54054531582482WOODEN PICTURE FRAME WHITE FINISH23/1/2011 14:144.96NaNUnited Kingdom
54154531582600NO SINGING METAL SIGN13/1/2011 14:144.13NaNUnited Kingdom
54254531584969BOX OF 6 ASSORTED COLOUR TEASPOONS13/1/2011 14:148.29NaNUnited Kingdom
\n", + "
" + ], + "text/plain": [ + " InvoiceNo StockCode Description Quantity \\\n", + "73 545230 20960 WATERMELON BATH SPONGE 1 \n", + "74 545230 21082 SET/20 FRUIT SALAD PAPER NAPKINS 1 \n", + "75 545230 21488 RED WHITE SCARF HOT WATER BOTTLE 1 \n", + "76 545230 35970 ZINC FOLKART SLEIGH BELLS 1 \n", + "77 545230 82583 HOT BATHS METAL SIGN 1 \n", + "78 545230 82583 HOT BATHS METAL SIGN 7 \n", + "338 545299 21730 GLASS STAR FROSTED T-LIGHT HOLDER 1 \n", + "540 545315 82482 WOODEN PICTURE FRAME WHITE FINISH 2 \n", + "541 545315 82600 NO SINGING METAL SIGN 1 \n", + "542 545315 84969 BOX OF 6 ASSORTED COLOUR TEASPOONS 1 \n", + "\n", + " InvoiceDate UnitPrice CustomerID Country \n", + "73 3/1/2011 9:55 2.46 NaN United Kingdom \n", + "74 3/1/2011 9:55 1.63 NaN United Kingdom \n", + "75 3/1/2011 9:55 8.29 NaN United Kingdom \n", + "76 3/1/2011 9:55 4.13 NaN United Kingdom \n", + "77 3/1/2011 9:55 4.13 NaN United Kingdom \n", + "78 3/1/2011 9:55 4.96 NaN United Kingdom \n", + "338 3/1/2011 12:19 4.95 NaN United Kingdom \n", + "540 3/1/2011 14:14 4.96 NaN United Kingdom \n", + "541 3/1/2011 14:14 4.13 NaN United Kingdom \n", + "542 3/1/2011 14:14 8.29 NaN United Kingdom " + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data[data.CustomerID.isna()].head(10)" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "59942" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data.CustomerID.isna().sum()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Why missing CustID?\n", + "- possibly guest checkout feature on the website\n", + "\n", + "\n", + "What could be possible features that you would collect if you want to segment \"guest\" customers?\n", + " - browser, IP, location, cookie" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Remove rows where customerID are NA" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(176137, 8)" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data.dropna(subset=['CustomerID'], inplace=True)\n", + "data.shape" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## RFM Analysis\n", + "RFM (Recency, Frequency, Monetary) analysis is a customer segmentation technique that uses past purchase behavior to divide customers into groups. RFM helps divide customers into various categories or clusters to identify customers who are more likely to respond to promotions and also for future personalization services.\n", + "\n", + "**RECENCY (R)**: Days since last purchase\n", + "\n", + "**FREQUENCY (F):** Total number of purchases\n", + "\n", + "**MONETARY VALUE (M):** Total money this customer spent.\n", + "\n", + "We will create those 3 customer attributes for each customer." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Recency\n", + "To calculate recency, we need to choose a date point from which we evaluate how many days ago was the customer's last purchase." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Find out the latest date in the data to use it as for reference" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'9/9/2011 9:52'" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data.InvoiceDate.max()" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "2011-12-09\n" + ] + } + ], + "source": [ + "now = dt.date(2011, 12, 9)\n", + "print(now)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Create a new column called date which contains the date of invoice only" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
InvoiceNoStockCodeDescriptionQuantityInvoiceDateUnitPriceCustomerIDCountrydate
054522021955DOORMAT UNION JACK GUNS AND ROSES23/1/2011 8:307.9514620.0United Kingdom2011-03-01
154522048194DOORMAT HEARTS23/1/2011 8:307.9514620.0United Kingdom2011-03-01
254522022556PLASTERS IN TIN CIRCUS PARADE123/1/2011 8:301.6514620.0United Kingdom2011-03-01
354522022139RETROSPOT TEA SET CERAMIC 11 PC33/1/2011 8:304.9514620.0United Kingdom2011-03-01
454522084029GKNITTED UNION FLAG HOT WATER BOTTLE43/1/2011 8:303.7514620.0United Kingdom2011-03-01
\n", + "
" + ], + "text/plain": [ + " InvoiceNo StockCode Description Quantity \\\n", + "0 545220 21955 DOORMAT UNION JACK GUNS AND ROSES 2 \n", + "1 545220 48194 DOORMAT HEARTS 2 \n", + "2 545220 22556 PLASTERS IN TIN CIRCUS PARADE 12 \n", + "3 545220 22139 RETROSPOT TEA SET CERAMIC 11 PC 3 \n", + "4 545220 84029G KNITTED UNION FLAG HOT WATER BOTTLE 4 \n", + "\n", + " InvoiceDate UnitPrice CustomerID Country date \n", + "0 3/1/2011 8:30 7.95 14620.0 United Kingdom 2011-03-01 \n", + "1 3/1/2011 8:30 7.95 14620.0 United Kingdom 2011-03-01 \n", + "2 3/1/2011 8:30 1.65 14620.0 United Kingdom 2011-03-01 \n", + "3 3/1/2011 8:30 4.95 14620.0 United Kingdom 2011-03-01 \n", + "4 3/1/2011 8:30 3.75 14620.0 United Kingdom 2011-03-01 " + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data['date'] = pd.DatetimeIndex(data.InvoiceDate).date\n", + "\n", + "data.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Check the last date of purchase with respect to CustomerID and calculate the RECENCY" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
CustomerIDLastPurchaseDate
012747.02011-08-22
112748.02011-09-30
212749.02011-08-01
312820.02011-09-26
412821.02011-05-09
\n", + "
" + ], + "text/plain": [ + " CustomerID LastPurchaseDate\n", + "0 12747.0 2011-08-22\n", + "1 12748.0 2011-09-30\n", + "2 12749.0 2011-08-01\n", + "3 12820.0 2011-09-26\n", + "4 12821.0 2011-05-09" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "recency_df = data.groupby('CustomerID', as_index=False).date.max()\n", + "recency_df.columns = ['CustomerID', 'LastPurchaseDate']\n", + "recency_df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
CustomerIDLastPurchaseDateRecency
012747.02011-08-22109
112748.02011-09-3070
212749.02011-08-01130
312820.02011-09-2674
412821.02011-05-09214
\n", + "
" + ], + "text/plain": [ + " CustomerID LastPurchaseDate Recency\n", + "0 12747.0 2011-08-22 109\n", + "1 12748.0 2011-09-30 70\n", + "2 12749.0 2011-08-01 130\n", + "3 12820.0 2011-09-26 74\n", + "4 12821.0 2011-05-09 214" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "recency_df['Recency'] = recency_df.LastPurchaseDate.apply(lambda x: (now-x).days)\n", + "recency_df.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Frequency\n", + "Frequency helps us to know how many times a customer purchased from us. To do that we need to check how many invoices are registered by the same customer." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Drop duplicate data from the data" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
InvoiceNoStockCodeDescriptionQuantityInvoiceDateUnitPriceCustomerIDCountrydate
054522021955DOORMAT UNION JACK GUNS AND ROSES23/1/2011 8:307.9514620.0United Kingdom2011-03-01
1554522122021BLUE FELT EASTER EGG BASKET63/1/2011 8:351.6514740.0United Kingdom2011-03-01
4554522222957SET 3 PAPER VINTAGE CHICK PAPER EGG63/1/2011 8:492.9513880.0United Kingdom2011-03-01
5454522322487WHITE WOOD GARDEN PLANT LADDER43/1/2011 8:588.5016462.0United Kingdom2011-03-01
5554522422664TOY TIDY DOLLY GIRL DESIGN53/1/2011 9:032.1017068.0United Kingdom2011-03-01
\n", + "
" + ], + "text/plain": [ + " InvoiceNo StockCode Description Quantity \\\n", + "0 545220 21955 DOORMAT UNION JACK GUNS AND ROSES 2 \n", + "15 545221 22021 BLUE FELT EASTER EGG BASKET 6 \n", + "45 545222 22957 SET 3 PAPER VINTAGE CHICK PAPER EGG 6 \n", + "54 545223 22487 WHITE WOOD GARDEN PLANT LADDER 4 \n", + "55 545224 22664 TOY TIDY DOLLY GIRL DESIGN 5 \n", + "\n", + " InvoiceDate UnitPrice CustomerID Country date \n", + "0 3/1/2011 8:30 7.95 14620.0 United Kingdom 2011-03-01 \n", + "15 3/1/2011 8:35 1.65 14740.0 United Kingdom 2011-03-01 \n", + "45 3/1/2011 8:49 2.95 13880.0 United Kingdom 2011-03-01 \n", + "54 3/1/2011 8:58 8.50 16462.0 United Kingdom 2011-03-01 \n", + "55 3/1/2011 9:03 2.10 17068.0 United Kingdom 2011-03-01 " + ] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "invoice_level_data = data.copy()\n", + "invoice_level_data.drop_duplicates(subset=['InvoiceNo', 'CustomerID'], keep='first', inplace=True)\n", + "invoice_level_data.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Calculate the frequency of purchases" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
CustomerIDFrequency
012747.05
112748.096
212749.03
312820.01
412821.01
\n", + "
" + ], + "text/plain": [ + " CustomerID Frequency\n", + "0 12747.0 5\n", + "1 12748.0 96\n", + "2 12749.0 3\n", + "3 12820.0 1\n", + "4 12821.0 1" + ] + }, + "execution_count": 26, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "frequency_df = invoice_level_data.groupby('CustomerID', as_index=False).InvoiceNo.count()\n", + "frequency_df.columns = ['CustomerID', 'Frequency']\n", + "frequency_df.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Monetary\n", + "\n", + "**Monetary attribute answers the question: How much money did the customer spent over time?**\n", + "\n", + "### To do that, first, we will create a new column total cost to have the total price per invoice." + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": {}, + "outputs": [], + "source": [ + "data['TotalCost'] = data.Quantity * data.UnitPrice" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
InvoiceNoStockCodeDescriptionQuantityInvoiceDateUnitPriceCustomerIDCountrydateTotalCost
054522021955DOORMAT UNION JACK GUNS AND ROSES23/1/2011 8:307.9514620.0United Kingdom2011-03-0115.90
154522048194DOORMAT HEARTS23/1/2011 8:307.9514620.0United Kingdom2011-03-0115.90
254522022556PLASTERS IN TIN CIRCUS PARADE123/1/2011 8:301.6514620.0United Kingdom2011-03-0119.80
354522022139RETROSPOT TEA SET CERAMIC 11 PC33/1/2011 8:304.9514620.0United Kingdom2011-03-0114.85
454522084029GKNITTED UNION FLAG HOT WATER BOTTLE43/1/2011 8:303.7514620.0United Kingdom2011-03-0115.00
\n", + "
" + ], + "text/plain": [ + " InvoiceNo StockCode Description Quantity \\\n", + "0 545220 21955 DOORMAT UNION JACK GUNS AND ROSES 2 \n", + "1 545220 48194 DOORMAT HEARTS 2 \n", + "2 545220 22556 PLASTERS IN TIN CIRCUS PARADE 12 \n", + "3 545220 22139 RETROSPOT TEA SET CERAMIC 11 PC 3 \n", + "4 545220 84029G KNITTED UNION FLAG HOT WATER BOTTLE 4 \n", + "\n", + " InvoiceDate UnitPrice CustomerID Country date TotalCost \n", + "0 3/1/2011 8:30 7.95 14620.0 United Kingdom 2011-03-01 15.90 \n", + "1 3/1/2011 8:30 7.95 14620.0 United Kingdom 2011-03-01 15.90 \n", + "2 3/1/2011 8:30 1.65 14620.0 United Kingdom 2011-03-01 19.80 \n", + "3 3/1/2011 8:30 4.95 14620.0 United Kingdom 2011-03-01 14.85 \n", + "4 3/1/2011 8:30 3.75 14620.0 United Kingdom 2011-03-01 15.00 " + ] + }, + "execution_count": 28, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
CustomerIDMonetary
012747.01760.09
112748.014680.85
212749.02755.23
312820.0217.77
412821.092.72
\n", + "
" + ], + "text/plain": [ + " CustomerID Monetary\n", + "0 12747.0 1760.09\n", + "1 12748.0 14680.85\n", + "2 12749.0 2755.23\n", + "3 12820.0 217.77\n", + "4 12821.0 92.72" + ] + }, + "execution_count": 29, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "monetary_df = data.groupby('CustomerID', as_index=False).TotalCost.sum()\n", + "monetary_df.columns = ['CustomerID', 'Monetary']\n", + "monetary_df.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Create RFM Table" + ] + }, + { + "cell_type": "code", + "execution_count": 82, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
LastPurchaseDateRecencyFrequencyMonetary
CustomerID
12747.02011-08-2210951760.09
12748.02011-09-30709614680.85
12749.02011-08-0113032755.23
12820.02011-09-26741217.77
12821.02011-05-09214192.72
\n", + "
" + ], + "text/plain": [ + " LastPurchaseDate Recency Frequency Monetary\n", + "CustomerID \n", + "12747.0 2011-08-22 109 5 1760.09\n", + "12748.0 2011-09-30 70 96 14680.85\n", + "12749.0 2011-08-01 130 3 2755.23\n", + "12820.0 2011-09-26 74 1 217.77\n", + "12821.0 2011-05-09 214 1 92.72" + ] + }, + "execution_count": 82, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "rfm_df = (recency_df\n", + " .merge(frequency_df, on='CustomerID')\n", + " .merge(monetary_df, on='CustomerID')\n", + ")\n", + "\n", + "rfm_df.set_index('CustomerID', inplace=True)\n", + "rfm_df.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Customer segments with RFM Model\n", + "\n", + "**The simplest way to create customers segments from RFM Model is to use Quartiles. We assign a score from 1 to 4 to Recency, Frequency and Monetary. Four is the best/highest value, and one is the lowest/worst value. A final RFM score is calculated simply by combining individual RFM score numbers.**\n", + "\n", + "Note: Quintiles (score from 1-5) offer better granularity, in case the business needs that but it will be more challenging to create segments since we will have 555 possible combinations. So, we will use quartiles." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Find RFM quartiles" + ] + }, + { + "cell_type": "code", + "execution_count": 160, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
RecencyFrequencyMonetary
0.0070.01.02.9000
0.2585.01.0258.0775
0.50119.02.0518.3500
0.75183.03.01182.9725
1.00283.096.0141789.3200
\n", + "
" + ], + "text/plain": [ + " Recency Frequency Monetary\n", + "0.00 70.0 1.0 2.9000\n", + "0.25 85.0 1.0 258.0775\n", + "0.50 119.0 2.0 518.3500\n", + "0.75 183.0 3.0 1182.9725\n", + "1.00 283.0 96.0 141789.3200" + ] + }, + "execution_count": 160, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "quantiles = rfm_df.quantile(q=[0,0.25,0.5,0.75,1])\n", + "quantiles" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Since we have duplicate bin edges for `Frequency` column, we can custom define the range for it" + ] + }, + { + "cell_type": "code", + "execution_count": 164, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.0 1.0\n", + "0.5 2.0\n", + "0.7 3.0\n", + "0.8 4.0\n", + "1.0 96.0\n", + "Name: Frequency, dtype: float64" + ] + }, + "execution_count": 164, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "rfm_df.Frequency.quantile(q=[0,0.5,0.7,0.8,1])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Creation of RFM Segments\n", + "\n", + "We will create two segmentation classes since, high recency is bad, while high frequency and monetary value is good.\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Create functions as per the appropriate quaritle values and apply them to create segments" + ] + }, + { + "cell_type": "code", + "execution_count": 166, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "CustomerID\n", + "12747.0 3\n", + "12748.0 4\n", + "12749.0 2\n", + "12820.0 4\n", + "12821.0 1\n", + "Name: Recency, dtype: category\n", + "Categories (4, int64): [4 < 3 < 2 < 1]" + ] + }, + "execution_count": 166, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "r_labels = range(4,0,-1)\n", + "r_groups = pd.qcut(rfm_df.Recency, q=4, labels=r_labels)\n", + "r_groups.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 167, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "CustomerID\n", + "12747.0 4\n", + "12748.0 4\n", + "12749.0 4\n", + "12820.0 1\n", + "12821.0 1\n", + "Name: Monetary, dtype: category\n", + "Categories (4, int64): [1 < 2 < 3 < 4]" + ] + }, + "execution_count": 167, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "m_labels = range(1,5)\n", + "m_groups = pd.qcut(rfm_df.Monetary, q=4, labels=m_labels)\n", + "m_groups.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 168, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "CustomerID\n", + "12747.0 4\n", + "12748.0 4\n", + "12749.0 2\n", + "12820.0 1\n", + "12821.0 1\n", + "Name: Frequency, dtype: category\n", + "Categories (4, int64): [1 < 2 < 3 < 4]" + ] + }, + "execution_count": 168, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "f_labels = range(1,5)\n", + "f_groups = pd.qcut(rfm_df.Frequency, q=[0,0.5,0.7,0.8,1], labels=f_labels)\n", + "\n", + "f_groups.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Now that we have the score of each customer, we can represent our customer segmentation, combine the scores (R_Quartile, F_Quartile,M_Quartile) together." + ] + }, + { + "cell_type": "code", + "execution_count": 170, + "metadata": {}, + "outputs": [], + "source": [ + "rfm_df = rfm_df.assign(R=r_groups,F=f_groups,M=m_groups)" + ] + }, + { + "cell_type": "code", + "execution_count": 171, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
LastPurchaseDateRecencyFrequencyMonetaryRFM
CustomerID
12747.02011-08-2210951760.09344
12748.02011-09-30709614680.85444
12749.02011-08-0113032755.23224
12820.02011-09-26741217.77411
12821.02011-05-09214192.72111
........................
18280.02011-03-072771180.60111
18281.02011-06-12180180.82211
18282.02011-08-051261100.21211
18283.02011-09-05958802.77343
18287.02011-05-222011765.28113
\n", + "

2864 rows × 7 columns

\n", + "
" + ], + "text/plain": [ + " LastPurchaseDate Recency Frequency Monetary R F M\n", + "CustomerID \n", + "12747.0 2011-08-22 109 5 1760.09 3 4 4\n", + "12748.0 2011-09-30 70 96 14680.85 4 4 4\n", + "12749.0 2011-08-01 130 3 2755.23 2 2 4\n", + "12820.0 2011-09-26 74 1 217.77 4 1 1\n", + "12821.0 2011-05-09 214 1 92.72 1 1 1\n", + "... ... ... ... ... .. .. ..\n", + "18280.0 2011-03-07 277 1 180.60 1 1 1\n", + "18281.0 2011-06-12 180 1 80.82 2 1 1\n", + "18282.0 2011-08-05 126 1 100.21 2 1 1\n", + "18283.0 2011-09-05 95 8 802.77 3 4 3\n", + "18287.0 2011-05-22 201 1 765.28 1 1 3\n", + "\n", + "[2864 rows x 7 columns]" + ] + }, + "execution_count": 171, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "rfm_df" + ] + }, + { + "cell_type": "code", + "execution_count": 172, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
LastPurchaseDateRecencyFrequencyMonetaryRFMRFM_segmentRFM_score
CustomerID
12747.02011-08-2210951760.0934434411.0
12748.02011-09-30709614680.8544444412.0
12749.02011-08-0113032755.232242248.0
12820.02011-09-26741217.774114116.0
12821.02011-05-09214192.721111113.0
\n", + "
" + ], + "text/plain": [ + " LastPurchaseDate Recency Frequency Monetary R F M \\\n", + "CustomerID \n", + "12747.0 2011-08-22 109 5 1760.09 3 4 4 \n", + "12748.0 2011-09-30 70 96 14680.85 4 4 4 \n", + "12749.0 2011-08-01 130 3 2755.23 2 2 4 \n", + "12820.0 2011-09-26 74 1 217.77 4 1 1 \n", + "12821.0 2011-05-09 214 1 92.72 1 1 1 \n", + "\n", + " RFM_segment RFM_score \n", + "CustomerID \n", + "12747.0 344 11.0 \n", + "12748.0 444 12.0 \n", + "12749.0 224 8.0 \n", + "12820.0 411 6.0 \n", + "12821.0 111 3.0 " + ] + }, + "execution_count": 172, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "rfm_df['RFM_segment'] = rfm_df.apply(lambda x : '{}{}{}'.format(x.R , x.F, x.M), axis=1)\n", + "rfm_df['RFM_score'] = rfm_df.loc[:,['R','F','M']].sum(axis=1)\n", + "rfm_df.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Find out the best customers" + ] + }, + { + "cell_type": "code", + "execution_count": 209, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([12748., 12839., 12901., 12910., 12921., 12957., 12971., 13004.,\n", + " 13014., 13018., 13021., 13078., 13089., 13094., 13097., 13102.,\n", + " 13178., 13263., 13266., 13268., 13384., 13394., 13408., 13418.,\n", + " 13468., 13488., 13576., 13599., 13630., 13694., 13695., 13704.,\n", + " 13767., 13777., 13798., 13842., 13862., 13881., 13985., 14004.,\n", + " 14031., 14056., 14060., 14062., 14096., 14159., 14191., 14194.,\n", + " 14221., 14227., 14235., 14239., 14282., 14298., 14367., 14395.,\n", + " 14401., 14456., 14462., 14524., 14527., 14543., 14562., 14606.,\n", + " 14659., 14667., 14676., 14680., 14688., 14709., 14735., 14755.,\n", + " 14769., 14800., 14808., 14854., 14868., 14944., 14952., 14961.,\n", + " 15005., 15039., 15044., 15061., 15078., 15114., 15140., 15144.,\n", + " 15150., 15152., 15187., 15194., 15218., 15241., 15290., 15301.,\n", + " 15311., 15356., 15358., 15410., 15465., 15498., 15547., 15555.,\n", + " 15640., 15674., 15796., 15804., 15827., 15838., 15867., 15955.,\n", + " 15981., 15984., 16011., 16013., 16029., 16033., 16076., 16103.,\n", + " 16133., 16145., 16156., 16161., 16168., 16187., 16326., 16407.,\n", + " 16422., 16458., 16523., 16525., 16558., 16607., 16626., 16656.,\n", + " 16672., 16681., 16700., 16705., 16709., 16710., 16713., 16729.,\n", + " 16746., 16779., 16813., 16818., 16839., 16928., 16931., 16945.,\n", + " 17017., 17049., 17061., 17068., 17069., 17220., 17238., 17243.,\n", + " 17306., 17315., 17340., 17377., 17389., 17402., 17416., 17428.,\n", + " 17450., 17491., 17511., 17576., 17581., 17611., 17613., 17644.,\n", + " 17651., 17652., 17656., 17669., 17675., 17677., 17686., 17716.,\n", + " 17719., 17725., 17730., 17750., 17757., 17758., 17811., 17841.,\n", + " 17857., 17858., 17865., 17920., 17949., 17997., 18008., 18041.,\n", + " 18094., 18102., 18109., 18118., 18144., 18172., 18198., 18225.,\n", + " 18226., 18229., 18241.])" + ] + }, + "execution_count": 209, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "rfm_df.loc[rfm_df.RFM_segment == '444'].index.values" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Learner Activity\n", + "\n", + "**1. Find the following:**" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "1. Best Customer \n", + "\n", + "- See above filter RFM_seg = 444" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "2. Loyal Customer \n", + "\n", + "- we are treating our most frequent customer as our loyal customer, hence filter RFM_seg = x4x" + ] + }, + { + "cell_type": "code", + "execution_count": 174, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "444 211\n", + "344 155\n", + "244 32\n", + "343 30\n", + "443 26\n", + "243 12\n", + "143 5\n", + "144 1\n", + "242 1\n", + "442 1\n", + "Name: RFM_segment, dtype: int64" + ] + }, + "execution_count": 174, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "rfm_df.loc[rfm_df.F == 4,'RFM_segment'].value_counts()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "3. Big Spenders \n", + "- since Monetary value distribution is skewed, we might consider our big spender to be filtered by RFM_seg = xx3+" + ] + }, + { + "cell_type": "code", + "execution_count": 72, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "444 253\n", + "344 214\n", + "233 109\n", + "333 103\n", + "433 85\n", + "343 63\n", + "244 60\n", + "443 44\n", + "243 41\n", + "133 38\n", + "234 38\n", + "323 37\n", + "423 37\n", + "434 33\n", + "334 32\n", + "113 31\n", + "223 30\n", + "123 29\n", + "413 25\n", + "213 23\n", + "424 15\n", + "224 14\n", + "313 14\n", + "324 13\n", + "124 11\n", + "114 8\n", + "143 7\n", + "134 7\n", + "414 6\n", + "214 5\n", + "144 4\n", + "314 3\n", + "Name: RFM_segment, dtype: int64" + ] + }, + "execution_count": 72, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "rfm_df.loc[rfm_df.M >= 3,'RFM_segment'].value_counts()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "4. Almost lost customers \n", + "- RFM_seg = <=2 <=2 <=2 \n", + "- or if you have periodic data, like for every financial quarter FY Q1, FY Q2, FY Q3 check the customer trend" + ] + }, + { + "cell_type": "code", + "execution_count": 202, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "111 333\n", + "112 234\n", + "211 192\n", + "212 163\n", + "222 22\n", + "122 8\n", + "121 5\n", + "221 3\n", + "Name: RFM_segment, dtype: int64" + ] + }, + "execution_count": 202, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "rfm_df.query('R<=2 and F<=2 and M<=2').RFM_segment.value_counts()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "5. Lost customers \n", + "- customers with the lowest score RFM_seg = 111 " + ] + }, + { + "cell_type": "code", + "execution_count": 203, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "111 333\n", + "Name: RFM_segment, dtype: int64" + ] + }, + "execution_count": 203, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "rfm_df.loc[rfm_df.RFM_segment=='111','RFM_segment'].value_counts()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**2. Now that we know our customers segments, how will you target them?**" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "- best -> make them feel valued send vouchers on bday, anniversary\n", + "- loyal-> upsell, crosssell\n", + "- almost/ lost -> discounts" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.3" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/Customer_Segmentation_with_RFM analysis/notebook/Customer_segentation_with_RFM.ipynb b/Customer_Segmentation_with_RFM analysis/notebook/Customer_segentation_with_RFM.ipynb index 90e8e35..2171bd1 100644 --- a/Customer_Segmentation_with_RFM analysis/notebook/Customer_segentation_with_RFM.ipynb +++ b/Customer_Segmentation_with_RFM analysis/notebook/Customer_segentation_with_RFM.ipynb @@ -11,6 +11,27 @@ "We will create cutomer segments as per the Recency,Frequency and Monetary analysis by analyzing the data to know our customer base. This knowlwdge can then be used to target customers to retain customers, pitch offers etc" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Source \n", + "UCI ML Repo - [Online Retail Data Set](https://archive.ics.uci.edu/ml/datasets/online+retail)\n", + "\n", + "\n", + "## Attribute Information:\n", + "|Column|Description|Type|\n", + "|---|---|---|\n", + "|InvoiceNo| Invoice number.| Nominal, a 6-digit integral number uniquely assigned to each transaction. If this code starts with letter 'c', it indicates a cancellation.|\n", + "|StockCode| Product (item) code. |Nominal, a 5-digit integral number uniquely assigned to each distinct product.|\n", + "|Description| Product (item) name.| Nominal.|\n", + "|Quantity| The quantities of each product (item) per transaction.| Numeric.|\n", + "|InvoiceDate| Invice Date and time. |Numeric, the day and time when each transaction was generated.|\n", + "|UnitPrice| Unit price.| Numeric, Product price per unit in sterling.|\n", + "|CustomerID| Customer number.| Nominal, a 5-digit integral number uniquely assigned to each customer.|\n", + "|Country| Country name.| Nominal, the name of the country where each customer resides.|\n" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -20,7 +41,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "metadata": {}, "outputs": [], "source": [ @@ -40,26 +61,461 @@ "warnings.filterwarnings(\"ignore\")" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Read the data" + ] + }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
InvoiceNoStockCodeDescriptionQuantityInvoiceDateUnitPriceCustomerIDCountry
054522021955DOORMAT UNION JACK GUNS AND ROSES23/1/2011 8:307.9514620.0United Kingdom
154522048194DOORMAT HEARTS23/1/2011 8:307.9514620.0United Kingdom
254522022556PLASTERS IN TIN CIRCUS PARADE123/1/2011 8:301.6514620.0United Kingdom
354522022139RETROSPOT TEA SET CERAMIC 11 PC33/1/2011 8:304.9514620.0United Kingdom
454522084029GKNITTED UNION FLAG HOT WATER BOTTLE43/1/2011 8:303.7514620.0United Kingdom
\n", + "
" + ], + "text/plain": [ + " InvoiceNo StockCode Description Quantity \\\n", + "0 545220 21955 DOORMAT UNION JACK GUNS AND ROSES 2 \n", + "1 545220 48194 DOORMAT HEARTS 2 \n", + "2 545220 22556 PLASTERS IN TIN CIRCUS PARADE 12 \n", + "3 545220 22139 RETROSPOT TEA SET CERAMIC 11 PC 3 \n", + "4 545220 84029G KNITTED UNION FLAG HOT WATER BOTTLE 4 \n", + "\n", + " InvoiceDate UnitPrice CustomerID Country \n", + "0 3/1/2011 8:30 7.95 14620.0 United Kingdom \n", + "1 3/1/2011 8:30 7.95 14620.0 United Kingdom \n", + "2 3/1/2011 8:30 1.65 14620.0 United Kingdom \n", + "3 3/1/2011 8:30 4.95 14620.0 United Kingdom \n", + "4 3/1/2011 8:30 3.75 14620.0 United Kingdom " + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data = pd.read_csv('../data/commercial_data.csv')\n", + "data.head()" + ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Read the data" + "### Checking for cancelled orders" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 11, "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
InvoiceNoStockCodeDescriptionQuantityInvoiceDateUnitPriceCustomerIDCountry
173363A563185BAdjust bad debt18/12/2011 14:5011062.06NaNUnited Kingdom
173364A563186BAdjust bad debt18/12/2011 14:51-11062.06NaNUnited Kingdom
173365A563187BAdjust bad debt18/12/2011 14:52-11062.06NaNUnited Kingdom
\n", + "
" + ], + "text/plain": [ + " InvoiceNo StockCode Description Quantity InvoiceDate \\\n", + "173363 A563185 B Adjust bad debt 1 8/12/2011 14:50 \n", + "173364 A563186 B Adjust bad debt 1 8/12/2011 14:51 \n", + "173365 A563187 B Adjust bad debt 1 8/12/2011 14:52 \n", + "\n", + " UnitPrice CustomerID Country \n", + "173363 11062.06 NaN United Kingdom \n", + "173364 -11062.06 NaN United Kingdom \n", + "173365 -11062.06 NaN United Kingdom " + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data[data.InvoiceNo.str.contains('\\D').replace(pd.NA,False)]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "no cancel orders, but some bad debt corrections, however there seem to be missing customer ID.." + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
InvoiceNoStockCodeDescriptionQuantityInvoiceDateUnitPriceCustomerIDCountry
7354523020960WATERMELON BATH SPONGE13/1/2011 9:552.46NaNUnited Kingdom
7454523021082SET/20 FRUIT SALAD PAPER NAPKINS13/1/2011 9:551.63NaNUnited Kingdom
7554523021488RED WHITE SCARF HOT WATER BOTTLE13/1/2011 9:558.29NaNUnited Kingdom
7654523035970ZINC FOLKART SLEIGH BELLS13/1/2011 9:554.13NaNUnited Kingdom
7754523082583HOT BATHS METAL SIGN13/1/2011 9:554.13NaNUnited Kingdom
7854523082583HOT BATHS METAL SIGN73/1/2011 9:554.96NaNUnited Kingdom
33854529921730GLASS STAR FROSTED T-LIGHT HOLDER13/1/2011 12:194.95NaNUnited Kingdom
54054531582482WOODEN PICTURE FRAME WHITE FINISH23/1/2011 14:144.96NaNUnited Kingdom
54154531582600NO SINGING METAL SIGN13/1/2011 14:144.13NaNUnited Kingdom
54254531584969BOX OF 6 ASSORTED COLOUR TEASPOONS13/1/2011 14:148.29NaNUnited Kingdom
\n", + "
" + ], + "text/plain": [ + " InvoiceNo StockCode Description Quantity \\\n", + "73 545230 20960 WATERMELON BATH SPONGE 1 \n", + "74 545230 21082 SET/20 FRUIT SALAD PAPER NAPKINS 1 \n", + "75 545230 21488 RED WHITE SCARF HOT WATER BOTTLE 1 \n", + "76 545230 35970 ZINC FOLKART SLEIGH BELLS 1 \n", + "77 545230 82583 HOT BATHS METAL SIGN 1 \n", + "78 545230 82583 HOT BATHS METAL SIGN 7 \n", + "338 545299 21730 GLASS STAR FROSTED T-LIGHT HOLDER 1 \n", + "540 545315 82482 WOODEN PICTURE FRAME WHITE FINISH 2 \n", + "541 545315 82600 NO SINGING METAL SIGN 1 \n", + "542 545315 84969 BOX OF 6 ASSORTED COLOUR TEASPOONS 1 \n", + "\n", + " InvoiceDate UnitPrice CustomerID Country \n", + "73 3/1/2011 9:55 2.46 NaN United Kingdom \n", + "74 3/1/2011 9:55 1.63 NaN United Kingdom \n", + "75 3/1/2011 9:55 8.29 NaN United Kingdom \n", + "76 3/1/2011 9:55 4.13 NaN United Kingdom \n", + "77 3/1/2011 9:55 4.13 NaN United Kingdom \n", + "78 3/1/2011 9:55 4.96 NaN United Kingdom \n", + "338 3/1/2011 12:19 4.95 NaN United Kingdom \n", + "540 3/1/2011 14:14 4.96 NaN United Kingdom \n", + "541 3/1/2011 14:14 4.13 NaN United Kingdom \n", + "542 3/1/2011 14:14 8.29 NaN United Kingdom " + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data[data.CustomerID.isna()].head(10)" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "59942" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data.CustomerID.isna().sum()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Why missing CustID?\n", + "- possibly guest checkout feature on the website\n", + "\n", + "\n", + "What could be possible features that you would collect if you want to segment \"guest\" customers?\n", + " - browser, IP, location, cookie" + ] }, { "cell_type": "markdown", @@ -70,10 +526,24 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 14, "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "data": { + "text/plain": [ + "(176137, 8)" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data.dropna(subset=['CustomerID'], inplace=True)\n", + "data.shape" + ] }, { "cell_type": "markdown", @@ -108,10 +578,41 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 15, "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "data": { + "text/plain": [ + "'9/9/2011 9:52'" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data.InvoiceDate.max()" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "2011-12-09\n" + ] + } + ], + "source": [ + "now = dt.date(2011, 12, 9)\n", + "print(now)" + ] }, { "cell_type": "markdown", @@ -122,10 +623,132 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 18, "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
InvoiceNoStockCodeDescriptionQuantityInvoiceDateUnitPriceCustomerIDCountrydate
054522021955DOORMAT UNION JACK GUNS AND ROSES23/1/2011 8:307.9514620.0United Kingdom2011-03-01
154522048194DOORMAT HEARTS23/1/2011 8:307.9514620.0United Kingdom2011-03-01
254522022556PLASTERS IN TIN CIRCUS PARADE123/1/2011 8:301.6514620.0United Kingdom2011-03-01
354522022139RETROSPOT TEA SET CERAMIC 11 PC33/1/2011 8:304.9514620.0United Kingdom2011-03-01
454522084029GKNITTED UNION FLAG HOT WATER BOTTLE43/1/2011 8:303.7514620.0United Kingdom2011-03-01
\n", + "
" + ], + "text/plain": [ + " InvoiceNo StockCode Description Quantity \\\n", + "0 545220 21955 DOORMAT UNION JACK GUNS AND ROSES 2 \n", + "1 545220 48194 DOORMAT HEARTS 2 \n", + "2 545220 22556 PLASTERS IN TIN CIRCUS PARADE 12 \n", + "3 545220 22139 RETROSPOT TEA SET CERAMIC 11 PC 3 \n", + "4 545220 84029G KNITTED UNION FLAG HOT WATER BOTTLE 4 \n", + "\n", + " InvoiceDate UnitPrice CustomerID Country date \n", + "0 3/1/2011 8:30 7.95 14620.0 United Kingdom 2011-03-01 \n", + "1 3/1/2011 8:30 7.95 14620.0 United Kingdom 2011-03-01 \n", + "2 3/1/2011 8:30 1.65 14620.0 United Kingdom 2011-03-01 \n", + "3 3/1/2011 8:30 4.95 14620.0 United Kingdom 2011-03-01 \n", + "4 3/1/2011 8:30 3.75 14620.0 United Kingdom 2011-03-01 " + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data['date'] = pd.DatetimeIndex(data.InvoiceDate).date\n", + "\n", + "data.head()" + ] }, { "cell_type": "markdown", @@ -136,10 +759,168 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 21, "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
CustomerIDLastPurchaseDate
012747.02011-08-22
112748.02011-09-30
212749.02011-08-01
312820.02011-09-26
412821.02011-05-09
\n", + "
" + ], + "text/plain": [ + " CustomerID LastPurchaseDate\n", + "0 12747.0 2011-08-22\n", + "1 12748.0 2011-09-30\n", + "2 12749.0 2011-08-01\n", + "3 12820.0 2011-09-26\n", + "4 12821.0 2011-05-09" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "recency_df = data.groupby('CustomerID', as_index=False).date.max()\n", + "recency_df.columns = ['CustomerID', 'LastPurchaseDate']\n", + "recency_df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
CustomerIDLastPurchaseDateRecency
012747.02011-08-22109
112748.02011-09-3070
212749.02011-08-01130
312820.02011-09-2674
412821.02011-05-09214
\n", + "
" + ], + "text/plain": [ + " CustomerID LastPurchaseDate Recency\n", + "0 12747.0 2011-08-22 109\n", + "1 12748.0 2011-09-30 70\n", + "2 12749.0 2011-08-01 130\n", + "3 12820.0 2011-09-26 74\n", + "4 12821.0 2011-05-09 214" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "recency_df['Recency'] = recency_df.LastPurchaseDate.apply(lambda x: (now-x).days)\n", + "recency_df.head()" + ] }, { "cell_type": "markdown", @@ -158,10 +939,132 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 24, "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
InvoiceNoStockCodeDescriptionQuantityInvoiceDateUnitPriceCustomerIDCountrydate
054522021955DOORMAT UNION JACK GUNS AND ROSES23/1/2011 8:307.9514620.0United Kingdom2011-03-01
1554522122021BLUE FELT EASTER EGG BASKET63/1/2011 8:351.6514740.0United Kingdom2011-03-01
4554522222957SET 3 PAPER VINTAGE CHICK PAPER EGG63/1/2011 8:492.9513880.0United Kingdom2011-03-01
5454522322487WHITE WOOD GARDEN PLANT LADDER43/1/2011 8:588.5016462.0United Kingdom2011-03-01
5554522422664TOY TIDY DOLLY GIRL DESIGN53/1/2011 9:032.1017068.0United Kingdom2011-03-01
\n", + "
" + ], + "text/plain": [ + " InvoiceNo StockCode Description Quantity \\\n", + "0 545220 21955 DOORMAT UNION JACK GUNS AND ROSES 2 \n", + "15 545221 22021 BLUE FELT EASTER EGG BASKET 6 \n", + "45 545222 22957 SET 3 PAPER VINTAGE CHICK PAPER EGG 6 \n", + "54 545223 22487 WHITE WOOD GARDEN PLANT LADDER 4 \n", + "55 545224 22664 TOY TIDY DOLLY GIRL DESIGN 5 \n", + "\n", + " InvoiceDate UnitPrice CustomerID Country date \n", + "0 3/1/2011 8:30 7.95 14620.0 United Kingdom 2011-03-01 \n", + "15 3/1/2011 8:35 1.65 14740.0 United Kingdom 2011-03-01 \n", + "45 3/1/2011 8:49 2.95 13880.0 United Kingdom 2011-03-01 \n", + "54 3/1/2011 8:58 8.50 16462.0 United Kingdom 2011-03-01 \n", + "55 3/1/2011 9:03 2.10 17068.0 United Kingdom 2011-03-01 " + ] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "invoice_level_data = data.copy()\n", + "invoice_level_data.drop_duplicates(subset=['InvoiceNo', 'CustomerID'], keep='first', inplace=True)\n", + "invoice_level_data.head()" + ] }, { "cell_type": "markdown", @@ -172,10 +1075,83 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 26, "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
CustomerIDFrequency
012747.05
112748.096
212749.03
312820.01
412821.01
\n", + "
" + ], + "text/plain": [ + " CustomerID Frequency\n", + "0 12747.0 5\n", + "1 12748.0 96\n", + "2 12749.0 3\n", + "3 12820.0 1\n", + "4 12821.0 1" + ] + }, + "execution_count": 26, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "frequency_df = invoice_level_data.groupby('CustomerID', as_index=False).InvoiceNo.count()\n", + "frequency_df.columns = ['CustomerID', 'Frequency']\n", + "frequency_df.head()" + ] }, { "cell_type": "markdown", @@ -190,10 +1166,225 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 27, "metadata": {}, "outputs": [], - "source": [] + "source": [ + "data['TotalCost'] = data.Quantity * data.UnitPrice" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
InvoiceNoStockCodeDescriptionQuantityInvoiceDateUnitPriceCustomerIDCountrydateTotalCost
054522021955DOORMAT UNION JACK GUNS AND ROSES23/1/2011 8:307.9514620.0United Kingdom2011-03-0115.90
154522048194DOORMAT HEARTS23/1/2011 8:307.9514620.0United Kingdom2011-03-0115.90
254522022556PLASTERS IN TIN CIRCUS PARADE123/1/2011 8:301.6514620.0United Kingdom2011-03-0119.80
354522022139RETROSPOT TEA SET CERAMIC 11 PC33/1/2011 8:304.9514620.0United Kingdom2011-03-0114.85
454522084029GKNITTED UNION FLAG HOT WATER BOTTLE43/1/2011 8:303.7514620.0United Kingdom2011-03-0115.00
\n", + "
" + ], + "text/plain": [ + " InvoiceNo StockCode Description Quantity \\\n", + "0 545220 21955 DOORMAT UNION JACK GUNS AND ROSES 2 \n", + "1 545220 48194 DOORMAT HEARTS 2 \n", + "2 545220 22556 PLASTERS IN TIN CIRCUS PARADE 12 \n", + "3 545220 22139 RETROSPOT TEA SET CERAMIC 11 PC 3 \n", + "4 545220 84029G KNITTED UNION FLAG HOT WATER BOTTLE 4 \n", + "\n", + " InvoiceDate UnitPrice CustomerID Country date TotalCost \n", + "0 3/1/2011 8:30 7.95 14620.0 United Kingdom 2011-03-01 15.90 \n", + "1 3/1/2011 8:30 7.95 14620.0 United Kingdom 2011-03-01 15.90 \n", + "2 3/1/2011 8:30 1.65 14620.0 United Kingdom 2011-03-01 19.80 \n", + "3 3/1/2011 8:30 4.95 14620.0 United Kingdom 2011-03-01 14.85 \n", + "4 3/1/2011 8:30 3.75 14620.0 United Kingdom 2011-03-01 15.00 " + ] + }, + "execution_count": 28, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
CustomerIDMonetary
012747.01760.09
112748.014680.85
212749.02755.23
312820.0217.77
412821.092.72
\n", + "
" + ], + "text/plain": [ + " CustomerID Monetary\n", + "0 12747.0 1760.09\n", + "1 12748.0 14680.85\n", + "2 12749.0 2755.23\n", + "3 12820.0 217.77\n", + "4 12821.0 92.72" + ] + }, + "execution_count": 29, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "monetary_df = data.groupby('CustomerID', as_index=False).TotalCost.sum()\n", + "monetary_df.columns = ['CustomerID', 'Monetary']\n", + "monetary_df.head()" + ] }, { "cell_type": "markdown", @@ -204,10 +1395,107 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 82, "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
LastPurchaseDateRecencyFrequencyMonetary
CustomerID
12747.02011-08-2210951760.09
12748.02011-09-30709614680.85
12749.02011-08-0113032755.23
12820.02011-09-26741217.77
12821.02011-05-09214192.72
\n", + "
" + ], + "text/plain": [ + " LastPurchaseDate Recency Frequency Monetary\n", + "CustomerID \n", + "12747.0 2011-08-22 109 5 1760.09\n", + "12748.0 2011-09-30 70 96 14680.85\n", + "12749.0 2011-08-01 130 3 2755.23\n", + "12820.0 2011-09-26 74 1 217.77\n", + "12821.0 2011-05-09 214 1 92.72" + ] + }, + "execution_count": 82, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "rfm_df = (recency_df\n", + " .merge(frequency_df, on='CustomerID')\n", + " .merge(monetary_df, on='CustomerID')\n", + ")\n", + "\n", + "rfm_df.set_index('CustomerID', inplace=True)\n", + "rfm_df.head()" + ] }, { "cell_type": "markdown", @@ -227,6 +1515,123 @@ "### Find RFM quartiles" ] }, + { + "cell_type": "code", + "execution_count": 160, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
RecencyFrequencyMonetary
0.0070.01.02.9000
0.2585.01.0258.0775
0.50119.02.0518.3500
0.75183.03.01182.9725
1.00283.096.0141789.3200
\n", + "
" + ], + "text/plain": [ + " Recency Frequency Monetary\n", + "0.00 70.0 1.0 2.9000\n", + "0.25 85.0 1.0 258.0775\n", + "0.50 119.0 2.0 518.3500\n", + "0.75 183.0 3.0 1182.9725\n", + "1.00 283.0 96.0 141789.3200" + ] + }, + "execution_count": 160, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "quantiles = rfm_df.quantile(q=[0,0.25,0.5,0.75,1])\n", + "quantiles" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Since we have duplicate bin edges for `Frequency` column, we can custom define the range for it" + ] + }, + { + "cell_type": "code", + "execution_count": 164, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.0 1.0\n", + "0.5 2.0\n", + "0.7 3.0\n", + "0.8 4.0\n", + "1.0 96.0\n", + "Name: Frequency, dtype: float64" + ] + }, + "execution_count": 164, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "rfm_df.Frequency.quantile(q=[0,0.5,0.7,0.8,1])" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -246,10 +1651,91 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 166, "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "data": { + "text/plain": [ + "CustomerID\n", + "12747.0 3\n", + "12748.0 4\n", + "12749.0 2\n", + "12820.0 4\n", + "12821.0 1\n", + "Name: Recency, dtype: category\n", + "Categories (4, int64): [4 < 3 < 2 < 1]" + ] + }, + "execution_count": 166, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "r_labels = range(4,0,-1)\n", + "r_groups = pd.qcut(rfm_df.Recency, q=4, labels=r_labels)\n", + "r_groups.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 167, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "CustomerID\n", + "12747.0 4\n", + "12748.0 4\n", + "12749.0 4\n", + "12820.0 1\n", + "12821.0 1\n", + "Name: Monetary, dtype: category\n", + "Categories (4, int64): [1 < 2 < 3 < 4]" + ] + }, + "execution_count": 167, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "m_labels = range(1,5)\n", + "m_groups = pd.qcut(rfm_df.Monetary, q=4, labels=m_labels)\n", + "m_groups.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 168, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "CustomerID\n", + "12747.0 4\n", + "12748.0 4\n", + "12749.0 2\n", + "12820.0 1\n", + "12821.0 1\n", + "Name: Frequency, dtype: category\n", + "Categories (4, int64): [1 < 2 < 3 < 4]" + ] + }, + "execution_count": 168, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "f_labels = range(1,5)\n", + "f_groups = pd.qcut(rfm_df.Frequency, q=[0,0.5,0.7,0.8,1], labels=f_labels)\n", + "\n", + "f_groups.head()" + ] }, { "cell_type": "markdown", @@ -260,24 +1746,396 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 170, "metadata": {}, "outputs": [], - "source": [] + "source": [ + "rfm_df = rfm_df.assign(R=r_groups,F=f_groups,M=m_groups)" + ] + }, + { + "cell_type": "code", + "execution_count": 171, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
LastPurchaseDateRecencyFrequencyMonetaryRFM
CustomerID
12747.02011-08-2210951760.09344
12748.02011-09-30709614680.85444
12749.02011-08-0113032755.23224
12820.02011-09-26741217.77411
12821.02011-05-09214192.72111
........................
18280.02011-03-072771180.60111
18281.02011-06-12180180.82211
18282.02011-08-051261100.21211
18283.02011-09-05958802.77343
18287.02011-05-222011765.28113
\n", + "

2864 rows × 7 columns

\n", + "
" + ], + "text/plain": [ + " LastPurchaseDate Recency Frequency Monetary R F M\n", + "CustomerID \n", + "12747.0 2011-08-22 109 5 1760.09 3 4 4\n", + "12748.0 2011-09-30 70 96 14680.85 4 4 4\n", + "12749.0 2011-08-01 130 3 2755.23 2 2 4\n", + "12820.0 2011-09-26 74 1 217.77 4 1 1\n", + "12821.0 2011-05-09 214 1 92.72 1 1 1\n", + "... ... ... ... ... .. .. ..\n", + "18280.0 2011-03-07 277 1 180.60 1 1 1\n", + "18281.0 2011-06-12 180 1 80.82 2 1 1\n", + "18282.0 2011-08-05 126 1 100.21 2 1 1\n", + "18283.0 2011-09-05 95 8 802.77 3 4 3\n", + "18287.0 2011-05-22 201 1 765.28 1 1 3\n", + "\n", + "[2864 rows x 7 columns]" + ] + }, + "execution_count": 171, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "rfm_df" + ] + }, + { + "cell_type": "code", + "execution_count": 172, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
LastPurchaseDateRecencyFrequencyMonetaryRFMRFM_segmentRFM_score
CustomerID
12747.02011-08-2210951760.0934434411.0
12748.02011-09-30709614680.8544444412.0
12749.02011-08-0113032755.232242248.0
12820.02011-09-26741217.774114116.0
12821.02011-05-09214192.721111113.0
\n", + "
" + ], + "text/plain": [ + " LastPurchaseDate Recency Frequency Monetary R F M \\\n", + "CustomerID \n", + "12747.0 2011-08-22 109 5 1760.09 3 4 4 \n", + "12748.0 2011-09-30 70 96 14680.85 4 4 4 \n", + "12749.0 2011-08-01 130 3 2755.23 2 2 4 \n", + "12820.0 2011-09-26 74 1 217.77 4 1 1 \n", + "12821.0 2011-05-09 214 1 92.72 1 1 1 \n", + "\n", + " RFM_segment RFM_score \n", + "CustomerID \n", + "12747.0 344 11.0 \n", + "12748.0 444 12.0 \n", + "12749.0 224 8.0 \n", + "12820.0 411 6.0 \n", + "12821.0 111 3.0 " + ] + }, + "execution_count": 172, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "rfm_df['RFM_segment'] = rfm_df.apply(lambda x : '{}{}{}'.format(x.R , x.F, x.M), axis=1)\n", + "rfm_df['RFM_score'] = rfm_df.loc[:,['R','F','M']].sum(axis=1)\n", + "rfm_df.head()" + ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### FInd out the best customers" + "### Find out the best customers" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 209, "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "data": { + "text/plain": [ + "array([12748., 12839., 12901., 12910., 12921., 12957., 12971., 13004.,\n", + " 13014., 13018., 13021., 13078., 13089., 13094., 13097., 13102.,\n", + " 13178., 13263., 13266., 13268., 13384., 13394., 13408., 13418.,\n", + " 13468., 13488., 13576., 13599., 13630., 13694., 13695., 13704.,\n", + " 13767., 13777., 13798., 13842., 13862., 13881., 13985., 14004.,\n", + " 14031., 14056., 14060., 14062., 14096., 14159., 14191., 14194.,\n", + " 14221., 14227., 14235., 14239., 14282., 14298., 14367., 14395.,\n", + " 14401., 14456., 14462., 14524., 14527., 14543., 14562., 14606.,\n", + " 14659., 14667., 14676., 14680., 14688., 14709., 14735., 14755.,\n", + " 14769., 14800., 14808., 14854., 14868., 14944., 14952., 14961.,\n", + " 15005., 15039., 15044., 15061., 15078., 15114., 15140., 15144.,\n", + " 15150., 15152., 15187., 15194., 15218., 15241., 15290., 15301.,\n", + " 15311., 15356., 15358., 15410., 15465., 15498., 15547., 15555.,\n", + " 15640., 15674., 15796., 15804., 15827., 15838., 15867., 15955.,\n", + " 15981., 15984., 16011., 16013., 16029., 16033., 16076., 16103.,\n", + " 16133., 16145., 16156., 16161., 16168., 16187., 16326., 16407.,\n", + " 16422., 16458., 16523., 16525., 16558., 16607., 16626., 16656.,\n", + " 16672., 16681., 16700., 16705., 16709., 16710., 16713., 16729.,\n", + " 16746., 16779., 16813., 16818., 16839., 16928., 16931., 16945.,\n", + " 17017., 17049., 17061., 17068., 17069., 17220., 17238., 17243.,\n", + " 17306., 17315., 17340., 17377., 17389., 17402., 17416., 17428.,\n", + " 17450., 17491., 17511., 17576., 17581., 17611., 17613., 17644.,\n", + " 17651., 17652., 17656., 17669., 17675., 17677., 17686., 17716.,\n", + " 17719., 17725., 17730., 17750., 17757., 17758., 17811., 17841.,\n", + " 17857., 17858., 17865., 17920., 17949., 17997., 18008., 18041.,\n", + " 18094., 18102., 18109., 18118., 18144., 18172., 18198., 18225.,\n", + " 18226., 18229., 18241.])" + ] + }, + "execution_count": 209, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "rfm_df.loc[rfm_df.RFM_segment == '444'].index.values" + ] }, { "cell_type": "markdown", @@ -285,19 +2143,198 @@ "source": [ "## Learner Activity\n", "\n", - "**1. Find the following:**\n", - "1. Best Customer\n", - "\n", - "2. Loyal Customer\n", - "\n", - "3. Big Spenders\n", - "\n", - "4. Almost lost customers\n", + "**1. Find the following:**" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "1. Best Customer \n", "\n", - "5. Lost customers\n", + "- See above filter RFM_seg = 444" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "2. Loyal Customer \n", "\n", + "- we are treating our most frequent customer as our loyal customer, hence filter RFM_seg = x4x" + ] + }, + { + "cell_type": "code", + "execution_count": 174, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "444 211\n", + "344 155\n", + "244 32\n", + "343 30\n", + "443 26\n", + "243 12\n", + "143 5\n", + "144 1\n", + "242 1\n", + "442 1\n", + "Name: RFM_segment, dtype: int64" + ] + }, + "execution_count": 174, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "rfm_df.loc[rfm_df.F == 4,'RFM_segment'].value_counts()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "3. Big Spenders \n", + "- since Monetary value distribution is skewed, we might consider our big spender to be filtered by RFM_seg = xx3+" + ] + }, + { + "cell_type": "code", + "execution_count": 72, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "444 253\n", + "344 214\n", + "233 109\n", + "333 103\n", + "433 85\n", + "343 63\n", + "244 60\n", + "443 44\n", + "243 41\n", + "133 38\n", + "234 38\n", + "323 37\n", + "423 37\n", + "434 33\n", + "334 32\n", + "113 31\n", + "223 30\n", + "123 29\n", + "413 25\n", + "213 23\n", + "424 15\n", + "224 14\n", + "313 14\n", + "324 13\n", + "124 11\n", + "114 8\n", + "143 7\n", + "134 7\n", + "414 6\n", + "214 5\n", + "144 4\n", + "314 3\n", + "Name: RFM_segment, dtype: int64" + ] + }, + "execution_count": 72, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "rfm_df.loc[rfm_df.M >= 3,'RFM_segment'].value_counts()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "4. Almost lost customers \n", + "- RFM_seg = <=2 <=2 <=2 \n", + "- or if you have periodic data, like for every financial quarter FY Q1, FY Q2, FY Q3 check the customer trend" + ] + }, + { + "cell_type": "code", + "execution_count": 202, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "111 333\n", + "112 234\n", + "211 192\n", + "212 163\n", + "222 22\n", + "122 8\n", + "121 5\n", + "221 3\n", + "Name: RFM_segment, dtype: int64" + ] + }, + "execution_count": 202, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "rfm_df.query('R<=2 and F<=2 and M<=2').RFM_segment.value_counts()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "5. Lost customers \n", + "- customers with the lowest score RFM_seg = 111 " + ] + }, + { + "cell_type": "code", + "execution_count": 203, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "111 333\n", + "Name: RFM_segment, dtype: int64" + ] + }, + "execution_count": 203, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "rfm_df.loc[rfm_df.RFM_segment=='111','RFM_segment'].value_counts()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ "**2. Now that we know our customers segments, how will you target them?**" ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "- best -> make them feel valued send vouchers on bday, anniversary\n", + "- loyal-> upsell, crosssell\n", + "- almost/ lost -> discounts" + ] } ], "metadata": { diff --git a/Daily_Power_Generation/notebook/daily_power_generation-MK.ipynb b/Daily_Power_Generation/notebook/daily_power_generation-MK.ipynb new file mode 100644 index 0000000..a66dca3 --- /dev/null +++ b/Daily_Power_Generation/notebook/daily_power_generation-MK.ipynb @@ -0,0 +1,3393 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "IKVe6h6kov4X" + }, + "source": [ + "# Daily Power Generation Data Cleaning and Analysis\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "fIJD74wYozv5" + }, + "source": [ + "\n", + "India is the world's third-largest producer and third largest consumer of electricity. The national electric grid in India has an installed capacity of 370.106 GW as of 31 March 2020. Renewable power plants, which also include large hydroelectric plants, constitute 35.86% of India's total installed capacity.\n", + "India has a surplus power generation capacity but lacks adequate distribution infrastructure.\n", + "\n", + "India's electricity sector is dominated by fossil fuels, in particular coal, which during the 2018-19 fiscal year produced about three-quarters of the country's electricity. The government is making efforts to increase investment in renewable energy. The government's National Electricity Plan of 2018 states that the country does not need more non-renewable power plants in the utility sector until 2027, with the commissioning of 50,025 MW coal-based power plants under construction and addition of 275,000 MW total renewable power capacity after the retirement of nearly 48,000 MW old coal-fired plants.\n", + "\n", + "India has recorded rapid growth in electricity generation since 1985, increasing from 179 TW-hr in 1985 to 1,057 TW-hr in 2012. The majority of the increase came from coal-fired plants and non-conventional renewable energy sources (RES), with the contribution from natural gas, oil, and hydro plants decreasing in 2012-2017. The gross utility electricity generation (excluding imports from Bhutan) was 1,384 billion kWh in 2019-20, representing 1.0 % annual growth compared to 2018-2019. The contribution from renewable energy sources was nearly 20% of the total. In the year 2019-20, all the incremental electricity generation is contributed by renewable energy sources as the power generation from fossil fuels decreased.\n", + "The drivers for India's electricity sector are its rapidly growing economy, rising exports, improving infrastructure, and increasing household incomes.\n", + "\n", + "\n", + "## Data\n", + "There are 2 CSV files for the study. Each file has detailed file, and row, and column description for easier understanding of the user.\n", + "\n", + "## Acknowledgements\n", + "Data has been extracted from openly available reports of National Power Portal at \"https://npp.gov.in/\". See more details [here](https://www.kaggle.com/navinmundhra/daily-power-generation-in-india-20172020)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Resources:\n", + "- [Pandas Groupby Named Aggregations](https://pandas.pydata.org/pandas-docs/stable/user_guide/groupby.html#groupby-aggregate-named)\n", + "- [Pandas Select columns that startswith/ endswith](https://stackoverflow.com/a/27275344/8210613)\n", + "- [Pandas Visuals](https://pandas.pydata.org/docs/user_guide/visualization.html#pie-plot)\n", + "- [Pandas Visuals custom pct - pie plot](https://stackoverflow.com/a/6170354/8210613)\n", + "- [Pandas Sum rows](https://www.kite.com/python/answers/how-to-sum-rows-of-a-pandas-dataframe-in-python)\n", + "- [Pandas Convert string to float](https://datatofish.com/convert-string-to-float-dataframe/)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "CsOsqQuCozb3" + }, + "source": [ + "## Session flow:\n", + "\n", + "* Data Cleaning and basic analysis will be done for the first 90 - 100 minutes.\n", + "\n", + "* Brainstorming activity to form atleast 3 questions which are to be answered and try to individually code it to get the desired output(20-30 minutes)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "4oIXyK75o71U" + }, + "source": [ + "## Load the libraries" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "DT0e5F87ox4n" + }, + "outputs": [], + "source": [ + "import os\n", + "import pandas as pd\n", + "import numpy as np\n", + "import matplotlib.pyplot as plt\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "pzhl8usGpH7m" + }, + "source": [ + "## Load the data" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "fgem045TpLVr" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
indexDateRegionThermal Generation Actual (in MU)Thermal Generation Estimated (in MU)Nuclear Generation Actual (in MU)Nuclear Generation Estimated (in MU)Hydro Generation Actual (in MU)Hydro Generation Estimated (in MU)
002017-09-01Northern624.23484.2130.3635.57273.27320.81
112017-09-01Western1,106.891,024.3325.173.8172.0021.53
222017-09-01Southern576.66578.5562.7349.80111.5764.78
332017-09-01Eastern441.02429.39NaNNaN85.9469.36
442017-09-01NorthEastern29.1115.91NaNNaN24.6421.21
\n", + "
" + ], + "text/plain": [ + " index Date Region Thermal Generation Actual (in MU) \\\n", + "0 0 2017-09-01 Northern 624.23 \n", + "1 1 2017-09-01 Western 1,106.89 \n", + "2 2 2017-09-01 Southern 576.66 \n", + "3 3 2017-09-01 Eastern 441.02 \n", + "4 4 2017-09-01 NorthEastern 29.11 \n", + "\n", + " Thermal Generation Estimated (in MU) Nuclear Generation Actual (in MU) \\\n", + "0 484.21 30.36 \n", + "1 1,024.33 25.17 \n", + "2 578.55 62.73 \n", + "3 429.39 NaN \n", + "4 15.91 NaN \n", + "\n", + " Nuclear Generation Estimated (in MU) Hydro Generation Actual (in MU) \\\n", + "0 35.57 273.27 \n", + "1 3.81 72.00 \n", + "2 49.80 111.57 \n", + "3 NaN 85.94 \n", + "4 NaN 24.64 \n", + "\n", + " Hydro Generation Estimated (in MU) \n", + "0 320.81 \n", + "1 21.53 \n", + "2 64.78 \n", + "3 69.36 \n", + "4 21.21 " + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "power_df = pd.read_csv('../data/power_generation.csv') \n", + "power_df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
State / Union territory (UT)Area (km2)RegionNational Share (%)
0Rajasthan342239Northern10.55
1Madhya Pradesh308350Central9.37
2Maharashtra307713Western9.36
3Uttar Pradesh240928Northern7.33
4Gujarat196024Western5.96
\n", + "
" + ], + "text/plain": [ + " State / Union territory (UT) Area (km2) Region National Share (%)\n", + "0 Rajasthan 342239 Northern 10.55\n", + "1 Madhya Pradesh 308350 Central 9.37\n", + "2 Maharashtra 307713 Western 9.36\n", + "3 Uttar Pradesh 240928 Northern 7.33\n", + "4 Gujarat 196024 Western 5.96" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "states_df = pd.read_csv('../data/State_Region_corrected.csv')\n", + "states_df.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "tTwjxqDSpNyL" + }, + "source": [ + "## Remove the column `index` from power_df dataframe" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "cXcK0DxKpkIv" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
DateRegionThermal Generation Actual (in MU)Thermal Generation Estimated (in MU)Nuclear Generation Actual (in MU)Nuclear Generation Estimated (in MU)Hydro Generation Actual (in MU)Hydro Generation Estimated (in MU)
02017-09-01Northern624.23484.2130.3635.57273.27320.81
12017-09-01Western1,106.891,024.3325.173.8172.0021.53
22017-09-01Southern576.66578.5562.7349.80111.5764.78
32017-09-01Eastern441.02429.39NaNNaN85.9469.36
42017-09-01NorthEastern29.1115.91NaNNaN24.6421.21
\n", + "
" + ], + "text/plain": [ + " Date Region Thermal Generation Actual (in MU) \\\n", + "0 2017-09-01 Northern 624.23 \n", + "1 2017-09-01 Western 1,106.89 \n", + "2 2017-09-01 Southern 576.66 \n", + "3 2017-09-01 Eastern 441.02 \n", + "4 2017-09-01 NorthEastern 29.11 \n", + "\n", + " Thermal Generation Estimated (in MU) Nuclear Generation Actual (in MU) \\\n", + "0 484.21 30.36 \n", + "1 1,024.33 25.17 \n", + "2 578.55 62.73 \n", + "3 429.39 NaN \n", + "4 15.91 NaN \n", + "\n", + " Nuclear Generation Estimated (in MU) Hydro Generation Actual (in MU) \\\n", + "0 35.57 273.27 \n", + "1 3.81 72.00 \n", + "2 49.80 111.57 \n", + "3 NaN 85.94 \n", + "4 NaN 24.64 \n", + "\n", + " Hydro Generation Estimated (in MU) \n", + "0 320.81 \n", + "1 21.53 \n", + "2 64.78 \n", + "3 69.36 \n", + "4 21.21 " + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "power_df.drop(columns='index', axis=1, inplace=True)\n", + "power_df.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "fl3mMuAgpl5G" + }, + "source": [ + "## Cleaning the power_df dataframe column names :\n", + "* Remove the substring `' (in MU)'` from all the columns in the power_df dataframe.\n", + "* Replace all the spaces with underscore in the power_df datafame\n", + "* All the column names to be converted to small case in the power_df dataframe\n" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "vGBuE_oTpqMf" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "['date',\n", + " 'region',\n", + " 'thermal_generation_actual',\n", + " 'thermal_generation_estimated',\n", + " 'nuclear_generation_actual',\n", + " 'nuclear_generation_estimated',\n", + " 'hydro_generation_actual',\n", + " 'hydro_generation_estimated']" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "cols= [] \n", + "for col in power_df.columns:\n", + " cols.append(col.replace(' (in MU)', '').replace(' ','_').lower())\n", + " \n", + "cols" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "vGBuE_oTpqMf" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
dateregionthermal_generation_actualthermal_generation_estimatednuclear_generation_actualnuclear_generation_estimatedhydro_generation_actualhydro_generation_estimated
02017-09-01Northern624.23484.2130.3635.57273.27320.81
12017-09-01Western1,106.891,024.3325.173.8172.0021.53
22017-09-01Southern576.66578.5562.7349.80111.5764.78
32017-09-01Eastern441.02429.39NaNNaN85.9469.36
42017-09-01NorthEastern29.1115.91NaNNaN24.6421.21
\n", + "
" + ], + "text/plain": [ + " date region thermal_generation_actual \\\n", + "0 2017-09-01 Northern 624.23 \n", + "1 2017-09-01 Western 1,106.89 \n", + "2 2017-09-01 Southern 576.66 \n", + "3 2017-09-01 Eastern 441.02 \n", + "4 2017-09-01 NorthEastern 29.11 \n", + "\n", + " thermal_generation_estimated nuclear_generation_actual \\\n", + "0 484.21 30.36 \n", + "1 1,024.33 25.17 \n", + "2 578.55 62.73 \n", + "3 429.39 NaN \n", + "4 15.91 NaN \n", + "\n", + " nuclear_generation_estimated hydro_generation_actual \\\n", + "0 35.57 273.27 \n", + "1 3.81 72.00 \n", + "2 49.80 111.57 \n", + "3 NaN 85.94 \n", + "4 NaN 24.64 \n", + "\n", + " hydro_generation_estimated \n", + "0 320.81 \n", + "1 21.53 \n", + "2 64.78 \n", + "3 69.36 \n", + "4 21.21 " + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "power_df.columns = [col.replace(' (in MU)', '').replace(' ','_').lower() for col in power_df.columns]\n", + "power_df.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "UK3mNYdGprSP" + }, + "source": [ + "## Cleaning state_df column names:\n", + "* Replace the column names 'State / Union territory (UT)', 'Area (km2)', 'Region' and 'National Share (%)' with 'state','area','region' and 'national_share' respectively" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "MhAVWRhkpu9t" + }, + "outputs": [], + "source": [ + "state_cols = {\n", + " 'State / Union territory (UT)' : 'state',\n", + " 'Area (km2)' : 'area',\n", + " 'National Share (%)': 'national_share',\n", + " 'Region':'region'\n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "MhAVWRhkpu9t" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
statearearegionnational_share
0Rajasthan342239Northern10.55
1Madhya Pradesh308350Central9.37
2Maharashtra307713Western9.36
3Uttar Pradesh240928Northern7.33
4Gujarat196024Western5.96
\n", + "
" + ], + "text/plain": [ + " state area region national_share\n", + "0 Rajasthan 342239 Northern 10.55\n", + "1 Madhya Pradesh 308350 Central 9.37\n", + "2 Maharashtra 307713 Western 9.36\n", + "3 Uttar Pradesh 240928 Northern 7.33\n", + "4 Gujarat 196024 Western 5.96" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "states_df.rename(columns=state_cols, inplace=True)\n", + "states_df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 10.55\n", + "1 9.37\n", + "2 9.36\n", + "3 7.33\n", + "4 5.96\n", + "Name: national_share, dtype: float64" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "states_df.national_share.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "3zBHvMndpw6P" + }, + "source": [ + "## The region names in both the dataframes have to be the same, check for the same, if not, make the necessary changes" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "k3KHNOuyp1oP" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "array(['Northern', 'Western', 'Southern', 'Eastern', 'NorthEastern'],\n", + " dtype=object)" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "power_df.region.unique()" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array(['Northern', 'Central', 'Western', 'Southern', 'Eastern',\n", + " 'Northeastern'], dtype=object)" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "states_df.region.unique()" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array(['Northern', 'Central', 'Western', 'Southern', 'Eastern',\n", + " 'NorthEastern'], dtype=object)" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "states_df.replace('Northeastern', 'NorthEastern', inplace=True)\n", + "states_df.region.unique()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "O8WeQOt1p2wW" + }, + "source": [ + "## Basic Data Study" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "CBIZN4Lzp9MU" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "RangeIndex: 4945 entries, 0 to 4944\n", + "Data columns (total 8 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 date 4945 non-null object \n", + " 1 region 4945 non-null object \n", + " 2 thermal_generation_actual 4945 non-null object \n", + " 3 thermal_generation_estimated 4945 non-null object \n", + " 4 nuclear_generation_actual 2967 non-null float64\n", + " 5 nuclear_generation_estimated 2967 non-null float64\n", + " 6 hydro_generation_actual 4945 non-null float64\n", + " 7 hydro_generation_estimated 4945 non-null float64\n", + "dtypes: float64(4), object(4)\n", + "memory usage: 309.2+ KB\n" + ] + } + ], + "source": [ + "power_df.info()" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "RangeIndex: 34 entries, 0 to 33\n", + "Data columns (total 4 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 state 34 non-null object \n", + " 1 area 34 non-null int64 \n", + " 2 region 34 non-null object \n", + " 3 national_share 34 non-null float64\n", + "dtypes: float64(1), int64(1), object(2)\n", + "memory usage: 1.2+ KB\n" + ] + } + ], + "source": [ + "states_df.info()" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
nuclear_generation_actualnuclear_generation_estimatedhydro_generation_actualhydro_generation_estimated
count2967.0000002967.0000004945.0000004945.000000
mean37.24220836.98787773.30592176.842965
std15.88396811.49129274.48214582.043952
min0.0000000.0000000.0000000.000000
25%26.14000030.19000026.91000023.310000
50%30.72000034.84000052.96000050.270000
75%46.83000043.07500085.94000095.800000
max68.74000076.640000348.720000397.380000
\n", + "
" + ], + "text/plain": [ + " nuclear_generation_actual nuclear_generation_estimated \\\n", + "count 2967.000000 2967.000000 \n", + "mean 37.242208 36.987877 \n", + "std 15.883968 11.491292 \n", + "min 0.000000 0.000000 \n", + "25% 26.140000 30.190000 \n", + "50% 30.720000 34.840000 \n", + "75% 46.830000 43.075000 \n", + "max 68.740000 76.640000 \n", + "\n", + " hydro_generation_actual hydro_generation_estimated \n", + "count 4945.000000 4945.000000 \n", + "mean 73.305921 76.842965 \n", + "std 74.482145 82.043952 \n", + "min 0.000000 0.000000 \n", + "25% 26.910000 23.310000 \n", + "50% 52.960000 50.270000 \n", + "75% 85.940000 95.800000 \n", + "max 348.720000 397.380000 " + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "power_df.describe()" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
areanational_share
count34.00000034.000000
mean92889.1176472.826265
std95464.8101132.914077
min114.0000000.003000
25%21392.5000000.650000
50%57409.5000001.750000
75%133907.7500004.070000
max342239.00000010.550000
\n", + "
" + ], + "text/plain": [ + " area national_share\n", + "count 34.000000 34.000000\n", + "mean 92889.117647 2.826265\n", + "std 95464.810113 2.914077\n", + "min 114.000000 0.003000\n", + "25% 21392.500000 0.650000\n", + "50% 57409.500000 1.750000\n", + "75% 133907.750000 4.070000\n", + "max 342239.000000 10.550000" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "states_df.describe()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Insight\n", + "- not all columns are correctly data-typed\n", + "- there are missing values in nuclear\n", + "- hydro is twice as high as nuclear\n", + "- ..." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "zY4lJY_Rp-Uj" + }, + "source": [ + "## Find if there are any null values in both the dataframes, if there are, what is the possile strategy to deal with them?" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "jBU-3zSIqEyf" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "date 0\n", + "region 0\n", + "thermal_generation_actual 0\n", + "thermal_generation_estimated 0\n", + "nuclear_generation_actual 1978\n", + "nuclear_generation_estimated 1978\n", + "hydro_generation_actual 0\n", + "hydro_generation_estimated 0\n", + "dtype: int64" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "power_df.isnull().sum()" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "state 0\n", + "area 0\n", + "region 0\n", + "national_share 0\n", + "dtype: int64" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "states_df.isnull().sum()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "WJf1HQjiqFUv" + }, + "source": [ + "## Subset the dataframe with only the null values and check for pattern" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "CFEBkmJCqNr6" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "array(['Eastern', 'NorthEastern'], dtype=object)" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "power_df.loc[power_df.nuclear_generation_actual.isnull(), 'region'].unique()" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array(['Eastern', 'NorthEastern'], dtype=object)" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "power_df.loc[power_df.nuclear_generation_estimated.isnull(), 'region'].unique()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "all the missing values are from Eastern and NorthEastern regions. Do these regions have no nuclear plants?" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
nuclear_generation_actualnuclear_generation_estimatedhydro_generation_actualhydro_generation_estimated
region
Eastern0.000.0048686.6252461.95
NorthEastern0.000.0017612.3214058.66
Northern26964.0831378.94188854.16201204.03
Southern55855.2146483.2471109.3477083.80
Western27678.3431880.8536235.3435180.02
\n", + "
" + ], + "text/plain": [ + " nuclear_generation_actual nuclear_generation_estimated \\\n", + "region \n", + "Eastern 0.00 0.00 \n", + "NorthEastern 0.00 0.00 \n", + "Northern 26964.08 31378.94 \n", + "Southern 55855.21 46483.24 \n", + "Western 27678.34 31880.85 \n", + "\n", + " hydro_generation_actual hydro_generation_estimated \n", + "region \n", + "Eastern 48686.62 52461.95 \n", + "NorthEastern 17612.32 14058.66 \n", + "Northern 188854.16 201204.03 \n", + "Southern 71109.34 77083.80 \n", + "Western 36235.34 35180.02 " + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "power_df.groupby('region').sum()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "ww5cCZE8qOr1" + }, + "source": [ + "### Nuclear Generation columns are empty for Eastern and NorthEastern region. Could be due to no nuclear plants in that region.So, Replacing the NaN values with 0" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "dPzEGB46qTaW" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "date 0\n", + "region 0\n", + "thermal_generation_actual 0\n", + "thermal_generation_estimated 0\n", + "nuclear_generation_actual 0\n", + "nuclear_generation_estimated 0\n", + "hydro_generation_actual 0\n", + "hydro_generation_estimated 0\n", + "dtype: int64" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "power_df.fillna(0, inplace=True)\n", + "power_df.isnull().sum()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "DYV6cxi7qUZ1" + }, + "source": [ + "## Covert the thermal generation values to float in the power_df" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "9Vk5MG32qbm6" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "date object\n", + "region object\n", + "thermal_generation_actual object\n", + "thermal_generation_estimated object\n", + "nuclear_generation_actual float64\n", + "nuclear_generation_estimated float64\n", + "hydro_generation_actual float64\n", + "hydro_generation_estimated float64\n", + "dtype: object" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "power_df.dtypes" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": { + "tags": [ + "raises-exception" + ] + }, + "outputs": [ + { + "ename": "ValueError", + "evalue": "could not convert string to float: '1,106.89'", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mpower_df\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mthermal_generation_actual\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mastype\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfloat\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[0;32m~/.local/lib/python3.7/site-packages/pandas/core/generic.py\u001b[0m in \u001b[0;36mastype\u001b[0;34m(self, dtype, copy, errors)\u001b[0m\n\u001b[1;32m 5696\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5697\u001b[0m \u001b[0;31m# else, only a single dtype is given\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 5698\u001b[0;31m \u001b[0mnew_data\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_data\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mastype\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdtype\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mdtype\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcopy\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mcopy\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0merrors\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0merrors\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 5699\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_constructor\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnew_data\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__finalize__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5700\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/.local/lib/python3.7/site-packages/pandas/core/internals/managers.py\u001b[0m in \u001b[0;36mastype\u001b[0;34m(self, dtype, copy, errors)\u001b[0m\n\u001b[1;32m 580\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 581\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mastype\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcopy\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mbool\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mFalse\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0merrors\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mstr\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m\"raise\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 582\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mapply\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"astype\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mdtype\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcopy\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mcopy\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0merrors\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0merrors\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 583\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 584\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mconvert\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/.local/lib/python3.7/site-packages/pandas/core/internals/managers.py\u001b[0m in \u001b[0;36mapply\u001b[0;34m(self, f, filter, **kwargs)\u001b[0m\n\u001b[1;32m 440\u001b[0m \u001b[0mapplied\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mb\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mapply\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mf\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 441\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 442\u001b[0;31m \u001b[0mapplied\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mgetattr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mb\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mf\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 443\u001b[0m \u001b[0mresult_blocks\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0m_extend_blocks\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mapplied\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mresult_blocks\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 444\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/.local/lib/python3.7/site-packages/pandas/core/internals/blocks.py\u001b[0m in \u001b[0;36mastype\u001b[0;34m(self, dtype, copy, errors)\u001b[0m\n\u001b[1;32m 623\u001b[0m \u001b[0mvals1d\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mvalues\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mravel\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 624\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 625\u001b[0;31m \u001b[0mvalues\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mastype_nansafe\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mvals1d\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcopy\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 626\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mValueError\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mTypeError\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 627\u001b[0m \u001b[0;31m# e.g. astype_nansafe can fail on object-dtype of strings\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/.local/lib/python3.7/site-packages/pandas/core/dtypes/cast.py\u001b[0m in \u001b[0;36mastype_nansafe\u001b[0;34m(arr, dtype, copy, skipna)\u001b[0m\n\u001b[1;32m 895\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mcopy\u001b[0m \u001b[0;32mor\u001b[0m \u001b[0mis_object_dtype\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0marr\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mor\u001b[0m \u001b[0mis_object_dtype\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdtype\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 896\u001b[0m \u001b[0;31m# Explicit copy, or required since NumPy can't view from / to object.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 897\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0marr\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mastype\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdtype\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcopy\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 898\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 899\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0marr\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mview\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdtype\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mValueError\u001b[0m: could not convert string to float: '1,106.89'" + ] + } + ], + "source": [ + "power_df.thermal_generation_actual.astype(float)" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 624.23\n", + "1 1,106.89\n", + "2 576.66\n", + "3 441.02\n", + "4 29.11\n", + "Name: thermal_generation_actual, dtype: object" + ] + }, + "execution_count": 25, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "power_df.thermal_generation_actual.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "988" + ] + }, + "execution_count": 26, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "power_df.thermal_generation_actual.str.contains(',').sum()" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "967" + ] + }, + "execution_count": 27, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "power_df.thermal_generation_estimated.str.contains(',').sum()" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 624.23\n", + "1 NaN\n", + "2 576.66\n", + "3 441.02\n", + "4 29.11\n", + " ... \n", + "4940 669.47\n", + "4941 NaN\n", + "4942 494.66\n", + "4943 482.86\n", + "4944 34.42\n", + "Name: thermal_generation_actual, Length: 4945, dtype: float64" + ] + }, + "execution_count": 28, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pd.to_numeric(power_df.thermal_generation_actual, errors='coerce')" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "date object\n", + "region object\n", + "thermal_generation_actual float64\n", + "thermal_generation_estimated float64\n", + "nuclear_generation_actual float64\n", + "nuclear_generation_estimated float64\n", + "hydro_generation_actual float64\n", + "hydro_generation_estimated float64\n", + "dtype: object" + ] + }, + "execution_count": 29, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "power_df.thermal_generation_actual = power_df.thermal_generation_actual.str.replace(',','').astype(float)\n", + "power_df.thermal_generation_estimated = power_df.thermal_generation_estimated.str.replace(',','').astype(float)\n", + "power_df.dtypes" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
thermal_generation_actualthermal_generation_estimatednuclear_generation_actualnuclear_generation_estimatedhydro_generation_actualhydro_generation_estimated
count4945.0000004945.0000004945.0000004945.0000004945.0000004945.000000
mean603.978358575.39511622.34532522.19272673.30592176.842965
std383.534208383.38729922.00688220.18985774.48214582.043952
min12.34000012.3800000.0000000.0000000.0000000.000000
25%470.050000427.4600000.0000000.00000026.91000023.310000
50%615.280000535.98000025.13000028.46000052.96000050.270000
75%689.530000672.74000034.02000036.60000085.94000095.800000
max1395.9700001442.38000068.74000076.640000348.720000397.380000
\n", + "
" + ], + "text/plain": [ + " thermal_generation_actual thermal_generation_estimated \\\n", + "count 4945.000000 4945.000000 \n", + "mean 603.978358 575.395116 \n", + "std 383.534208 383.387299 \n", + "min 12.340000 12.380000 \n", + "25% 470.050000 427.460000 \n", + "50% 615.280000 535.980000 \n", + "75% 689.530000 672.740000 \n", + "max 1395.970000 1442.380000 \n", + "\n", + " nuclear_generation_actual nuclear_generation_estimated \\\n", + "count 4945.000000 4945.000000 \n", + "mean 22.345325 22.192726 \n", + "std 22.006882 20.189857 \n", + "min 0.000000 0.000000 \n", + "25% 0.000000 0.000000 \n", + "50% 25.130000 28.460000 \n", + "75% 34.020000 36.600000 \n", + "max 68.740000 76.640000 \n", + "\n", + " hydro_generation_actual hydro_generation_estimated \n", + "count 4945.000000 4945.000000 \n", + "mean 73.305921 76.842965 \n", + "std 74.482145 82.043952 \n", + "min 0.000000 0.000000 \n", + "25% 26.910000 23.310000 \n", + "50% 52.960000 50.270000 \n", + "75% 85.940000 95.800000 \n", + "max 348.720000 397.380000 " + ] + }, + "execution_count": 30, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "power_df.describe()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "po8qVHobqcTO" + }, + "source": [ + "## Coverting the date values to DateTime format" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "ALWIMSmIqg6f" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "date datetime64[ns]\n", + "region object\n", + "thermal_generation_actual float64\n", + "thermal_generation_estimated float64\n", + "nuclear_generation_actual float64\n", + "nuclear_generation_estimated float64\n", + "hydro_generation_actual float64\n", + "hydro_generation_estimated float64\n", + "dtype: object" + ] + }, + "execution_count": 31, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "power_df.date = pd.to_datetime(power_df.date)\n", + "power_df.dtypes" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
dateregionthermal_generation_actualthermal_generation_estimatednuclear_generation_actualnuclear_generation_estimatedhydro_generation_actualhydro_generation_estimated
02017-09-01Northern624.23484.2130.3635.57273.27320.81
12017-09-01Western1106.891024.3325.173.8172.0021.53
22017-09-01Southern576.66578.5562.7349.80111.5764.78
32017-09-01Eastern441.02429.390.000.0085.9469.36
42017-09-01NorthEastern29.1115.910.000.0024.6421.21
\n", + "
" + ], + "text/plain": [ + " date region thermal_generation_actual \\\n", + "0 2017-09-01 Northern 624.23 \n", + "1 2017-09-01 Western 1106.89 \n", + "2 2017-09-01 Southern 576.66 \n", + "3 2017-09-01 Eastern 441.02 \n", + "4 2017-09-01 NorthEastern 29.11 \n", + "\n", + " thermal_generation_estimated nuclear_generation_actual \\\n", + "0 484.21 30.36 \n", + "1 1024.33 25.17 \n", + "2 578.55 62.73 \n", + "3 429.39 0.00 \n", + "4 15.91 0.00 \n", + "\n", + " nuclear_generation_estimated hydro_generation_actual \\\n", + "0 35.57 273.27 \n", + "1 3.81 72.00 \n", + "2 49.80 111.57 \n", + "3 0.00 85.94 \n", + "4 0.00 24.64 \n", + "\n", + " hydro_generation_estimated \n", + "0 320.81 \n", + "1 21.53 \n", + "2 64.78 \n", + "3 69.36 \n", + "4 21.21 " + ] + }, + "execution_count": 32, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "power_df.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "ZSuEUGEkqiGg" + }, + "source": [ + "## Find out the region which has the highest number of states and find out which states they are.\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "sBxti6YCqn4j" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
statearearegionnational_share
0Rajasthan342239Northern10.550
1Madhya Pradesh308350Central9.370
2Maharashtra307713Western9.360
3Uttar Pradesh240928Northern7.330
4Gujarat196024Western5.960
5Karnataka191791Southern5.830
6Andhra Pradesh162970Southern4.870
7Odisha155707Eastern4.730
8Chhattisgarh135191Central4.110
9Tamil Nadu130058Southern3.950
10Telangana112077Southern3.490
11Bihar94163Eastern2.860
12West Bengal88752Eastern2.700
13Arunachal Pradesh83743NorthEastern2.540
14Jharkhand79714Eastern2.420
15Assam78438NorthEastern2.380
16Ladakh59146Northern1.800
17Himachal Pradesh55673Northern1.700
18Uttarakhand53483Northern1.620
19Punjab50362Northern1.530
20Haryana44212Northern1.340
21Jammu and Kashmir42241Northern1.280
22Kerala38863Southern1.180
23Meghalaya22429NorthEastern0.680
24Manipur22327NorthEastern0.680
25Mizoram21081NorthEastern0.640
26Nagaland16579NorthEastern0.500
27Tripura10486NorthEastern0.310
28Sikkim7096NorthEastern0.210
29Goa3702Western0.110
30Delhi1483Northern0.040
31Dadra and Nagar Haveli and Daman and Diu603Western0.010
32Puducherry492Southern0.010
33Chandigarh114Northern0.003
\n", + "
" + ], + "text/plain": [ + " state area region \\\n", + "0 Rajasthan 342239 Northern \n", + "1 Madhya Pradesh 308350 Central \n", + "2 Maharashtra 307713 Western \n", + "3 Uttar Pradesh 240928 Northern \n", + "4 Gujarat 196024 Western \n", + "5 Karnataka 191791 Southern \n", + "6 Andhra Pradesh 162970 Southern \n", + "7 Odisha 155707 Eastern \n", + "8 Chhattisgarh 135191 Central \n", + "9 Tamil Nadu 130058 Southern \n", + "10 Telangana 112077 Southern \n", + "11 Bihar 94163 Eastern \n", + "12 West Bengal 88752 Eastern \n", + "13 Arunachal Pradesh 83743 NorthEastern \n", + "14 Jharkhand 79714 Eastern \n", + "15 Assam 78438 NorthEastern \n", + "16 Ladakh 59146 Northern \n", + "17 Himachal Pradesh 55673 Northern \n", + "18 Uttarakhand 53483 Northern \n", + "19 Punjab 50362 Northern \n", + "20 Haryana 44212 Northern \n", + "21 Jammu and Kashmir 42241 Northern \n", + "22 Kerala 38863 Southern \n", + "23 Meghalaya 22429 NorthEastern \n", + "24 Manipur 22327 NorthEastern \n", + "25 Mizoram 21081 NorthEastern \n", + "26 Nagaland 16579 NorthEastern \n", + "27 Tripura 10486 NorthEastern \n", + "28 Sikkim 7096 NorthEastern \n", + "29 Goa 3702 Western \n", + "30 Delhi 1483 Northern \n", + "31 Dadra and Nagar Haveli and Daman and Diu 603 Western \n", + "32 Puducherry 492 Southern \n", + "33 Chandigarh 114 Northern \n", + "\n", + " national_share \n", + "0 10.550 \n", + "1 9.370 \n", + "2 9.360 \n", + "3 7.330 \n", + "4 5.960 \n", + "5 5.830 \n", + "6 4.870 \n", + "7 4.730 \n", + "8 4.110 \n", + "9 3.950 \n", + "10 3.490 \n", + "11 2.860 \n", + "12 2.700 \n", + "13 2.540 \n", + "14 2.420 \n", + "15 2.380 \n", + "16 1.800 \n", + "17 1.700 \n", + "18 1.620 \n", + "19 1.530 \n", + "20 1.340 \n", + "21 1.280 \n", + "22 1.180 \n", + "23 0.680 \n", + "24 0.680 \n", + "25 0.640 \n", + "26 0.500 \n", + "27 0.310 \n", + "28 0.210 \n", + "29 0.110 \n", + "30 0.040 \n", + "31 0.010 \n", + "32 0.010 \n", + "33 0.003 " + ] + }, + "execution_count": 33, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "states_df.head(100)" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'Northern'" + ] + }, + "execution_count": 34, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "max_state_region = states_df.groupby('region')['state'].count().idxmax()\n", + "max_state_region" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "96.093" + ] + }, + "execution_count": 35, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "states_df.national_share.sum()" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array(['Rajasthan', 'Uttar Pradesh', 'Ladakh', 'Himachal Pradesh',\n", + " 'Uttarakhand', 'Punjab', 'Haryana', 'Jammu and Kashmir', 'Delhi',\n", + " 'Chandigarh'], dtype=object)" + ] + }, + "execution_count": 36, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "states_df.groupby('region')['state'].unique()[max_state_region]" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
statearea
uniquecountsummean
region
Central[Madhya Pradesh, Chhattisgarh]2443541221770.500000
Eastern[Odisha, Bihar, West Bengal, Jharkhand]4418336104584.000000
NorthEastern[Arunachal Pradesh, Assam, Meghalaya, Manipur,...826217932772.375000
Northern[Rajasthan, Uttar Pradesh, Ladakh, Himachal Pr...1088988188988.100000
Southern[Karnataka, Andhra Pradesh, Tamil Nadu, Telang...6636251106041.833333
Western[Maharashtra, Gujarat, Goa, Dadra and Nagar Ha...4508042127010.500000
\n", + "
" + ], + "text/plain": [ + " state area \\\n", + " unique count sum \n", + "region \n", + "Central [Madhya Pradesh, Chhattisgarh] 2 443541 \n", + "Eastern [Odisha, Bihar, West Bengal, Jharkhand] 4 418336 \n", + "NorthEastern [Arunachal Pradesh, Assam, Meghalaya, Manipur,... 8 262179 \n", + "Northern [Rajasthan, Uttar Pradesh, Ladakh, Himachal Pr... 10 889881 \n", + "Southern [Karnataka, Andhra Pradesh, Tamil Nadu, Telang... 6 636251 \n", + "Western [Maharashtra, Gujarat, Goa, Dadra and Nagar Ha... 4 508042 \n", + "\n", + " \n", + " mean \n", + "region \n", + "Central 221770.500000 \n", + "Eastern 104584.000000 \n", + "NorthEastern 32772.375000 \n", + "Northern 88988.100000 \n", + "Southern 106041.833333 \n", + "Western 127010.500000 " + ] + }, + "execution_count": 37, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "states_df.groupby('region').agg({'state': ['unique','count'],\n", + " 'area':['sum','mean']})" + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
statestotal_statestotal_areaaverage_area
region
Central[Madhya Pradesh, Chhattisgarh]2443541221770.500000
Eastern[Odisha, Bihar, West Bengal, Jharkhand]4418336104584.000000
NorthEastern[Arunachal Pradesh, Assam, Meghalaya, Manipur,...826217932772.375000
Northern[Rajasthan, Uttar Pradesh, Ladakh, Himachal Pr...1088988188988.100000
Southern[Karnataka, Andhra Pradesh, Tamil Nadu, Telang...6636251106041.833333
Western[Maharashtra, Gujarat, Goa, Dadra and Nagar Ha...4508042127010.500000
\n", + "
" + ], + "text/plain": [ + " states total_states \\\n", + "region \n", + "Central [Madhya Pradesh, Chhattisgarh] 2 \n", + "Eastern [Odisha, Bihar, West Bengal, Jharkhand] 4 \n", + "NorthEastern [Arunachal Pradesh, Assam, Meghalaya, Manipur,... 8 \n", + "Northern [Rajasthan, Uttar Pradesh, Ladakh, Himachal Pr... 10 \n", + "Southern [Karnataka, Andhra Pradesh, Tamil Nadu, Telang... 6 \n", + "Western [Maharashtra, Gujarat, Goa, Dadra and Nagar Ha... 4 \n", + "\n", + " total_area average_area \n", + "region \n", + "Central 443541 221770.500000 \n", + "Eastern 418336 104584.000000 \n", + "NorthEastern 262179 32772.375000 \n", + "Northern 889881 88988.100000 \n", + "Southern 636251 106041.833333 \n", + "Western 508042 127010.500000 " + ] + }, + "execution_count": 38, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "states_agg_df = states_df.groupby('region').agg(states=('state','unique'),\n", + " total_states=('state','count'),\n", + " total_area=('area','sum'),\n", + " average_area=('area','mean'))\n", + "\n", + "states_agg_df" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 39, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "states_agg_df.total_states.sort_values().plot.barh()" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 40, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAjwAAAIuCAYAAAC7EdIKAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjMsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+AADFEAAAgAElEQVR4nOzdd5hU9dk+8PuZusvuslSpwiAIM+BQFBCk2WOLXaMxJpqYxBhjNBhd+xpj1yS/vG/KG+uaJtEYk7ApJppQRIog0gYkCEiVvtTd2Znz/f0xQ7ISts/Mc8r9ua69WGZnz7mBZefec75FjDEgIiIicjOfdgAiIiKifGPhISIiItdj4SEiIiLXY+EhIiIi12PhISIiItdj4SEiIiLXY+EhIiIi12PhISIiItdj4SEiIiLXY+EhIiIi12PhISIiItdj4SEiIiLXY+EhIiIi12PhISIiItdj4SEiIiLXY+EhIiIi12PhISIiItdj4SEiIiLXY+EhIiIi12PhISIiItdj4SEiIiLXY+EhIiIi12PhISIiItdj4SEiIiLXY+EhIiIi12PhISIiItdj4SEiIiLXY+EhIiIi12PhISIiItdj4SEiIiLXY+EhIiIi12PhISIiItdj4SEiIiLXY+EhIiIi12PhISIiItdj4SEiIiLXY+EhIiIi12PhISIiItdj4SEiIiLXY+EhIiIi12PhISIiItdj4SEiIiLXY+EhIiIi1wtoByAiov8mImkASxs89LIx5rFWHuNUAEljzJxcZiNyIhYeIiJ7OmSMGdnOY5wKYD+AFhceEQkYY1LtPC+R7YgxRjsDEREdQUT2G2NKj/L4/QA+DaAYmSLzVWOMEZFbANwIIAVgBYAKAHMBpAFsB/ANACsB/BRAv+zhbjXGvC0ilQB6A4gA2AHgDQAXAugAYCCA3xlj7sjPn5SoMFh4iIhs6Ci3tB41xkwTkS7GmF3Z5/wcwG+MMX8Ukc0ABhhj6kSkkzFmT7bI7DfGPJV9/q8A/NgYM1tE+gH4qzEmln3epwFMNMYcEpHrANwPYBSAOgCrsh/bUJA/PFEe8JYWEZE9NXZL6zQRuQOZqy9dACwH8EcASwD8UkReB/B6I8c8E8BQETn8+44iUpZ9/w/GmEMNnvumMaYGAERkBYD+AFh4yLFYeIiIHEJEigD8GMBoY8yG7JWZouyHzwcwGZlbUfeJyLCjHMIHYPwRxQbZAnTgiOfWNXg/Db5ekMNxWjoRkXMcLjc7RKQUwOUAICI+AMcaY/4B4A4AnQCUAtgHoKzB578B4ObDvxGR9g6KJnIMNnYiInsqFpHFDX7/F2NMhYg8g8zYnnUAFmQ/5gfwCxEpByAAvp8dw/NHAK+KyEXIDFq+BcCPRGQJMt//ZyIz0JnI9ThomYiIiFyPt7SIiIjI9Vh4qFEiYkTk6Qa/vz07SLI1xzhVRE5p8PsXReTyozwvIiKHRGRxg7fPtyHzxSIytLWfR0RE7sbCQ02pA3CpiHRryyeLSACZlV5Paeaph60xxoxs8PZSG057MYBWFZ5sTiIicjF+o6empAD8DMBtAO5p+AER6Q/geQDdkVnF9XpjzEci8iKAXcgsWLYLwAQAaRH5HDKDJgFgsoh8C0BPAHcYY15tKoSI/ATAGGRWln3VGPNA9vHHkJmCm0Jm9slr2d9PEZF7AVyWPcSPsjkPAviyMWblETkXicg+ZFafPS776w+MMT9s1d8WERHZFgsPNedHAJaIyBNHPP6/AF4yxlSJyBcB/BCZqysAMBjAmcaY9FFWev0SgF4AJgKIAvgDgMOFZ+ARs1K+YYyZBeAeY8wuEfEDeFNEhgPYCOASANHssvqHV5b9A4Dph0uUiLwJ4EZjzGoRORmZNUxObyRnFMBpyEzjXSUiPzHG1Lfvr4+IiOyAhYeaZIzZKyIvITOdteFiZeMBXJp9/+cAGhaiV4wx6SYO+7oxxgKwQkR6NHh8TSMry14pIl9B5uu1FzK3rFYAqAXwrIhUA5h+5Cdl1yk5BcArDVaWDTeRs9oYUwegTkS2AeiBTLEiIiKHY+GhlvgBgEUAXmjiOQ3XNzhyxdYjNVzBVRp9FgARGQDgdgBjjDG7s7eiiowxKREZC+AMAFchs5ja6Ud8ug/AniZ2nObKskREHsFBy9Ss7EaFvwHwpQYPz0GmaADANQBmN/LpR6702lodkSkmNdmrQecC/756U26M+ROAWwEcLjX/Pp8xZi+AtSJyRfZzRERGtCMLERE5FH+CpZZ6Gg2WpEfmFtfzIvJtZActN/J5R6702pQjx/A8b4z5oYi8h8wGiR8CeDv7sTIAv8/uLSTIDKwGgJcBPCMityCz7P41AH6SHcQczH78/Wb/tJQbleVFyGxyWZL9teFbw8eKkVktOH3EW6qRx+oA7Mm+7QawG5U1Da/QERF9AldaJqLWqSzvhMwMu57IjHNq+GvD97sjUzILpQ6Z8rPniF93ITMW66MGb5tRWdPUODMichkWHiL6hHhVPHjGgYP9f7BtRxTAwAZvxwGI4D8bWDpZGsBmfLIEfQRgPYBVAD5EZY2lF4+Ico2Fh8ij4lXxjgCGITMdv+HbcWHLWvfu+o2DNPMpOwQgAWAZMrdTM79W1qxXTUVEbcbCQ+QB8ap4EMAIACcDGJt9G4LGZskZU7tk3YawNDOLzoP2IbMkwuES9C6ABaisqVVNRUTNYuEhcqF4Vfx4/KfYnIzMLLZwk590hD9v2LSpbyrdJw/x3KYewGIA72Tf5qCy5iPdSER0JBYeIoeLV8U7I7OFx+GCMwZAl/Ye93sfb1901sFDJ7b3OB61GYfLT+bXRZxFRqSLhYfIYeJVcR8ypeac7NsYZKZ059T1e/bO/NbuPZNzfVyPqkOm+Pwt+7aQg6KJCouFh8gB4lXxngA+hUzBOQtA13yfc9yh2hnPbN02Jd/n8ahdAN5EZtPbP6OyZpNyHiLXY+EhsqHsIONT8J+rOCNQ4AHEPVOp+X/bsHlsIc/pYUsA/BnAn5AZA5RSzkPkOiw8RDYRr4p3R2YH+HOR2Reso2aekDEfLly34TjNDB61B5nNcKcBeAOVNUnlPESuwMJDpCheFS9FpuRcg8xGqPbZ7sWY5OJ1G/z+PIwPohbbA+D3yOxl9zdU1tQr5yFyLBYeogLL3q46F8BnAVyIzD5StjR9w+YN/VOpY7VzEIDMNhmvI1N+/s7bXkStw8JDVADxqrgAmIxMybkcOZg2XgiPb9ux8LwDB0/SzkH/ZScy5edlAG+isobfyImawcJDlEfxqvhIZErOVQAcd6XkczV7Z9y5aw9natnbOgDPAXgelTWblbMQ2RYLD1GOxaviXQFcn30bqhynXUYfqp3xAqemO0UaQDWAZ5CZ6s7d4IkaYOEhypF4VXwsgK8DuBLu2FEc3VOpd9/asHm0dg5qtY0AXgDwHDc8Jcpg4SFqh3hVvBjA1QC+BsB1xSBgzPr31m3or52D2sxCZnHDZwD8gQOdyctYeIjaIF4V7wPgZgBfgUMGILeJMan31m1AwE7T5amtPgLwAwDPoLJmv3YYokJj4SFqhXhVfDSA2wBcASCoHKcgXt+4ed3A+lREOwflzB4APwXwQ1TWbNEOQ1QoLDxEzchu1nkxMkVnonKcgnt4+44FF+4/OEY7B+VcEsAvATyFypoV2mGI8o2Fh6gR2aJzFYD7AQxRjqPm6r37Zty9czdnarmXQWYPrydRWTNDOwxRvrDwEB0hu0jgFQAegMOnlefCqNramS9t2TZZOwcVxHwAj6Gy5nfaQYhyjYWHKCtbdC4BUAkgrpvGPrql0gv/sWETV1v2loUA7kVlzV+0gxDlCgsPEYB4VfxCZIrOKOUotuM3ZuPidRv6aucgFbMA3IPKmlnaQYjai4WHPC1eFT8PwINw4Ro6OWOMtXDdhvoQENaOQmr+gkzxWaQdhKitWHjIk+JV8bORKTrjtLM4wSubtqyJJusHaucgVQbAawDu56wuciIWHvKUeFV8DIDvwYPTy9vjwe0751+6/8BY7RxkCxYy09nvR2XNOuUsRC3GwkOeEK+KdwfwKIAvAhDlOI5z+d59Mx7g1HT6pFoATwF4FJU1B7XDEDXHpx2AKJ/iVXF/vCr+DQAfAPgSWHbaZFUoxL83OlIRgHsBJFBZfqV2GKLmsPCQa8Wr4pMALALwQwCdlOM42sZgoEw7A9lWPwDTUFn+FirLT9AOQ9QY3tIi14lXxXsjc6n9au0sbuE3ZvPidRt6a+cg20sB+Aky43v2aIchaoiFh1wjXhUPIrPf1X0ASpXjuIsxZsH6jbVFxhRrRyFH2A7gbgDPo7LG0g5DBPCWFrlEdpr5UgCPg2Un90RkTTC4UTsGOUZ3AM8AmIfKcq7STbbAwkOOFq+K941XxX8H4K/w8AafhbA8HNqpnYEcZzSAuagsfwSV5Vy4klSx8JBjxavi1yJzVedi7SxesCwcqtXOQI4UAHAXgEWoLOdaTqQmoB2AqLXiVfFuAP4PwKXaWbxkVSjI7xfUHkMBzEFl+feQGdTMAk0FxSs85CjxqvhFAJaDZafgNgU4NZ3azQ/g2wAWo7L8FO0w5C2cpUWOEK+Kd0RmPZ0vaGfxKp8xH7+/bkMP7RzkGhaA/4fMpqSHtMOQ+/EKD9levCp+OjJjdVh2FFkiPQ6I7NfOQa7hQ2YZiSWoLOfedpR3vMJDthWvihcjM838ZnBLCFv4xeatq0bUJTkbjnItDeAhAA9x3R7KF17hIVuKV8XHAngPwDfAsmMby8KhXdoZyJX8ACoBvInKcq7oTXnBwkO2kt3s8yEAc8B1dWxnWThcp52BXO1UZAY0n6MdhNyHhYdsI14V7wngTWR2YPYrx6GjWB3k1HTKu+4A/oTK8idRWR7UDkPuwcJDthCvik9B5hbWFO0s1LjNgUBH7QzkCQLgdgCzUFkeUc5CLsFBy6QqXhUXZNbleAS8qmN7Ysz2Jes2dNfOQZ6yB8ANqKz5rXYQcjYWHlITr4qXA6gCcJF2Fmq5Oes27C0zhld6qNB+DOBWVNbUawchZ+ItLVKRiMaG/fqx1D967zQjtbNQ66wKhTZpZyBPugnAW6gs5+KX1CYsPFRwiWjscgBz/QajHn8+nQzVm4PamajlloVDu7UzkGdNBPAuKstP0g5CzsPCQwWTiMZ8iWjsEQCvACgFgHAKxz/2Qvo93WTUGsvDoaR2BvK0vgBmo7L8c9pByFlYeKggEtFYOYBqAHcd+bG+OzHh+jfSMwqfitpidSjIqcKkrWiD1e3mSEX145GKar6OUYvwC4XyLhGNHQvgbQCNLiZ2zkJzysg11pLCpaK22hoIdNLOQN52yIRWn5N8fBiAOwC8FqmoLtXORPbHwkN5lYjGRgCYC2BYU88TIFjxitWzy17zcWGSUVsdFOHS/6TGMrLznORjxQdQfLjkXARgdqSiup9mLrI/Fh7Km0Q0djaAWQBa9ALpMzjmqWfTH/vThtNObcyIdN7j83HgMhWcMaj/cv23Nqw3Pfse8aERAOZHKqrHaeQiZ2DhobxIRGPXITNmp6w1n1dah+H3/To9Jy+hKGdWhYKbtTOQ9/w0/el33rROamwpix4A3opUVF9QyEzkHCw8lHOJaOwBAC8AaNO+S0M3YMqFc623c5uKcmlZOLxHOwN5ywJr8MzHU1dPbuZpxQB+F6movrYQmchZWHgoZxLRWCARjT0HoLK9x7rmH9aogZvNB+1PRfmwLBzibUcqmJ2m7L2rkved0sKnBwBURSqqb81nJnIeFh7KiUQ0VgZgOoAv5uJ4AnR46OfpcMkhU5OL41FurQkGQ9oZyBvqjX/9GXVPRdLwt+aKsQD4fqSi+uF85SLnYeGhdktEYz0AzADwqVweN2Ch/1PPpT8AN3yznY8D/s7aGcj9jMG+i5IPpfagrK1fb3dHKqp/yrV6CGDhoXZKRGO9kSk7o/Jx/K77MOabv7e4KKHNHBTpo52B3M0YWN9OfTWxwkQGtvNQXwXwcqSimlclPY6Fh9osEY31RabsDMnneU5JmMmTllnv5vMc1EoiHXf4fdu1Y5B7vZKePPPV9JSxOTrcFQCquUCht7HwUJskorH+yJSdQfk+lwC+m/9oDeq103yU73NRy60MhbZoZyB3+sDq8/YdqRtPzfFhz0Rm2nrXHB+XHIKFh1otEY0NQKbsHFeocwrQ6fEX0odC9eZQoc5JTVsWDu3VzkDus88UL/908uF87YY+BsA/IxXV3fJ0fLIxFh5qlUQ0NhCZstO/0OcuqseQR6rSiwp9Xjq65aFQSjsDuUvayNaz657oWodQUR5PcwKAv0Uqqjnw3mNYeKjFEtHYYGTKzrFaGfptx4TP/z09U+v89B8fhoJh7QzkHsag9rPJe3ZtQdeeBTjdSABvRCqqywtwLrIJFh5qkUQ0FgPwTwDqs3POX2DGD//QWqqdw+u2+f1dtDOQezya+uzCeWbo0AKecjSAP3Mgs3ew8FCzEtHYMGTKTi/lKAAyO6vf9Rure6f9hrOEFNVyajrlyFvpkTN+lr5ggsKpxwP4U6SiuoPCuanAWHioSYlobBCAtwAco52lIb9Bz6efSW/2WYbjSLSIlH7s93+sHYOcbbPpMv9L9bdPUowwCcAfIxXVxYoZqABYeKhRiWisF4A3YLOyc1hZLUbc+zI3GdW0MhTcqp2BnKvWBNecVfdkzMCn/Vp0OjKbjnJcmotpf5GRTSWisXIAfwEwQDtLU05Yb6acP9+ao53Dq5aGw5yaTm1iGew+L/lo4ACKy7SzZH0KwKuRiuqgdhDKDxYe+i+JaKwIwB8BDNfO0hKff9MacdwWs1o7hxctD4cs7QzkPMYg9bX6W9d9aHoXfHmLZlwAYFqkotqvHYRyj4WHPiERjfkBvIzMfW1HEKDkuy+lQx1qubN6oa0Ncmo6td5z6fPm/NUam5f993LgEgA/1A5BucfCQ0f6PwAXaYdorezO6qu4s3ph7fD7uEw/tcp71sCZ3019brJ2jmbcFKmovl07BOUWCw/9WyIaewTAl7RztFW3vRj7jT9YXJSwgOpE+hqAJZNaZLcpXXxF8oHx2jla6IlIRfUV2iEod1h4CACQiMZuBXCXdo72mrjCTJqwnDurF4xI8eaAn5uIUrPqjX/DGXVPHptCwCmDggXAS5GK6lO0g1BusPAQEtHYNQC+p50jFwTw3fIHa2CPXWajdhavSIRCXIuHmmQM9l+WrKzdhXKn3QItAvCHSEX18dpBqP1YeDwuEY1NAPA8Mj/NuIIAnZ98Pr0/mDK12lm8YGk4tE87A9mXMTB3p760fIkZ6NTS0BWZLSi4w7rDsfB4WCIaOxbAawBC2llyrage0Yer0ry1VQCJcIhjeKhRr1sTZvw6fcbJ2jnaaSAyV3ryuYs75RkLj0clorFiAK/Dpqso50JkGyZe8w/urJ5v64JBLslPR7XG6jXntvqvn6qdI0fGA/hFpKKar5sOxX8473oewInaIfLtwrlmXHyttUw7h5vt9PmdNi6DCuCAKUqcn3zErmvttNVlAJ7QDkFtw8LjQYlorALAVdo5CkGA0N3TrK7l3Fk9b5KCvhbAFZfp39JGtp1d93inWoTdePVvaqSi+rPaIaj1WHg8JhGNnQ/gYe0cheQ36PX0s+lNPsuktbO4kkh4QyCwWTsG2YMxqPt8/V3bNqF7L+0sefSzSEX1CdohqHVYeDwkEY1FAfwKHvx373gII++aZs3WzuFWK8Kcmk4ZT6WuXPC2dYLby0AJgNciFdUdtYNQy3nuhc+rEtFYJwC/B+DZ/6Aj1pkp5y6w3tHO4UbLwqED2hlI38x0fMaP0hdP1M5RIMcDqIpUVLtmSQ+3Y+HxgEQ05gPwawCDtbNou+7vVjyy1azRzuE2iRCnpnvdVtP53S/U3+mYTYdz5GIAd2iHoJZh4fGGhwCcox3CDgQoffiltK+41uzVzuIm64OBDtoZSE+dCXx4Vt0Txxv4vPia8nCkovp07RDUPC9+cXpKIho7A0CFdg47CaYx4Mnn0wnurJ47u/3+7toZSIdlsOeC5CO+fSgp186ixA/g15GK6r7aQahpLDwulojGugH4Ofjv/F+OqcHJX59uzdDO4Rb1QN8UkNLOQYVlDNLfqL9lzWrTN6KdRdkxAF6JVFS7btV6N+ELoUslojEB8CIAN08NbZfJy8zk8QlroXYOVxAJfBQMbNKOQYVVlT777Wpr3EnaOWxiHIDva4egxrHwuNc3AZyvHcLOBPB983VrQI/d3Fk9F1aEQlzc0UOWWpFZlanrJmvnsJmbIhXVl2uHoKNj4XGhRDQ2CsDj2jmcwAd0eeL59N5AytRpZ3G6peEwp6Z7xB5TsuTS5HecviFovvw0UlHNK+s2xMLjMolorASZKei8l9xCxUkM/e7P0/O1czjdynBQOwIVQMr4Np1Z92SvegT4PebouiKzVyHZDAuP+/wPgCHaIZzmuK2YdNWM9CztHE72USBYqp2B8ssYHLg8Wbl/BzpxVl7TzolUVH9NOwR9EguPiySisasBXK+dw6kumWPGDl1vVmjncKo9ft8x2hkof4yBuS91/dLFZhB/oGqZpyIV1cdrh6D/YOFxiUQ0NgDAT7VzOJkA4ft+ne5UfsDs0M7iRCmgdxJIaueg/Ki2Tp7xi/RZ47RzOEgHAL+IVFQHtINQBguPCzSYgu7ZfbJyxW/Q+6ln0xu4s3obiPjXBoOc8eZC66we79xcf8sU7RwONBbAPdohKIOFxx2+CoDTQ3Ok/CBG3fmKxfE8bbAiHOLVMZc5aMKrzk0+NgIQbpLZNvdGKqrHaIcgFh7HS0RjfcAp6Dk36kNz6tkLrbnaOZxmaTh0SDsD5Y5lZPunko+XHkKYe6W1XQDAzyMV1cXaQbyOhcf5fgLeysqLL71hDev/sflQO4eTrAqFeBXAJYxB8rr6O7ZsMMf00c7iAkMAPKkdwutYeBwsEY1dBeDT2jncSoCyR6rSKK4z+7SzOMXGYIBT013i/6UvnTfTGjFcO4eL3BSpqJ6oHcLLWHgcKhGNdQXwQ+0cbhdM47jHn08v187hFDU+Xw/tDNR+76SHzvxB6vJJ2jlcRgD8hLO29LDwONf3AXDxrwLouQfjbqxO/1M7hxOkgd51glrtHNR220z5wmvq756gncOlTgDwLe0QXsXC40CJaOwcANdq5/CS05aYSWNXWe9p57A9EVnDqemOlTSBdWfWPTnIgs+vncXF7o9UVPfTDuFFLDwOk4jGSgH8n3YOrxHA/63XrGO77zGbtbPY3fJwaKd2Bmo9Y1Dz6eR3zV6UlmtncbkSZLYAogJj4XGeRwDwpwMFPqDbU8+l93Bn9aYtDYd5S8thjEH61vqvr15l+g3QzuIRF0Yqqi/UDuE1LDwOkojGTgTwde0cXlacxNDv/II7qzdlVSjI7ysO86v06bN/b00YrZ3DY/4nUlFdoh3CS/iNyVm+D/6bqRu0BZOumMWd1RuzKRAo085ALZew+s2+J3UDt40ovH4A7tcO4SV88XSIRDR2Obh9hG1cPtuMiW4wCe0cdrTP5+upnYFaZq/psPSi5EPc9kDPtyIV1Sdoh/AKFh4HSERjYQBPaOeg/xCg6IFfpcvKDppd2lnsxhLpeVDkgHYOalrK+LacVfdEjySCYe0sHhZAZm0erlBeACw8znAbAA4mtBm/hb5PP5teK8ZY2lns5l8hTk23M2Nw8KrkfXs+RpdjtLMQJgL4rHYIL2DhsblENNYDwN3aOejoOh3ASd9+1ZqpncNuloVCvPJlY99JXfv+u2ZITDsH/dt3IxXVvNKWZyw89vcwAA4CtbGT/mWmnPGeNU87h50sC4c4dd+m/poe/c8X0ueO185BnxABcFOhTyoiPUXkZRFZIyIrRORPIjK4Dce5TkR6t+HzKkXk9tZ+Xlux8NhYIhobCeB67RzUNAHkK3+xYsduM2u1s9jF6lCI+wXZ0Aar27yv1t/GGVn2dE+korpgiz6KiAD4HYB/GmMGGmOGInM3oS374V0H4KiFR0Rss2o3C4+9cRq6QwjQ8dEX01ZR0uzXzmIHmwP+jtoZ6JMOmdDqc5KPDwOEA2TtqSuAOwt4vtMA1Btjfnr4AWPMYmPMLBH5togsEJElIvIgAIhIREQSIvKMiCwXkTdEpFhELgcwGsAvRWRx9rF1InK/iMwGcIWIfDl7vPdF5Lci0qGAf85/44upTSWisUsBnKqdg1oulMbAx59PL9XOYQf7fL5e2hnoPywjO85JPlZ8AMWl2lmoSbdGKqr7FOhcJwBYeOSDInI2gOMBjAUwEsBJInJ4SZTjAfzIGDMMwB4AlxljXgXwLoBrjDEjjTGHss+tNcZMNMa8DOA1Y8wYY8wIAAkAX8rrn6wRLDw2lIjGguA0dEfqtRvjv/zn9AztHNqMSPd9Inu1cxBgDOpvqJ+6ab3p2Vc7CzWrGMCDyhnOzr69B2ARgCgyRQcA1hpjFmffX4jM2KPGTGvw/gkiMktElgK4BsCwnCZuIRYee/oigIHaIahtzlxsJo7+wFrc/DPd7YNQaJN2BgJ+nL5w7lvWiSO0c1CLXRepqC7EDLrlAE46yuMC4NHs1ZqRxphBxpjnsh9rOBkhjcw6Qo1puBbXiwBuNsbEkSl0RW2P3XYsPDaTXWTwXu0c1HYC+G9/zerTrcZs0c6iaXk4tFs7g9ctsIbMfDJ11STtHNQqfgCPFeA8bwEIi8iXDz8gImMA7AXwRREpzT7WR0SaW69pH5qeTVwGYIuIBJG5wqOChcd+vgKAl54dzmfQ/aln07sCaZPUzqJlWTjk2T+7Hew0Ze9dlbz3FO0c1CYXRiqqJ+bzBMYYA+ASAGdlp6UvB1AJ4FfZt3eyt6BeRfNLo7wI4KeHBy0f5eP3AZgH4G8AVubmT9B6kvkzkx0korFiAGsAcMCnS3zQGzPv/ULAk3ugDUom3/7dpq0TtHN4Ub3xrx9T9+OOe1DWWTsLtdmcdY+dz/8/OcQrPPbyNbDsuMrgzZh82WxrtnYODVsDgYKtKUL/YQz2XpR8KMWy43inRCqqz9YO4SYsPDaRvbpzh3YOyr0rZ1mjB280apdxtRxow8qr1D7GwLq9/sZVK0yEkx7cgarjIWEAACAASURBVOM5c4iFxz6+jLatcEk2J0BR5S/TpaUHjacG8RqRLjU+3x7tHF7ySnrKrN9ak8do56CcmRSpqOag8xxh4bGB7MwsXt1xsUBmZ/U1XttZfVUoyKnpBbLK6vv2HamvctsI9+FVnhxh4bGH6wEUanVNUtL5AEZPfc1bO6svC4d4hacA9pni5Rcmv3u0NVXI+c6OVFTzql0OsPAoS0RjARR2/xRSNOYDM+W096352jkKZVk4XK+dwe3SRraeXfdE1zqEVBZzo4K4WzuAG7Dw6Pssml6em1xEALnxT9aQvtvNOu0shbAmGAxqZ3AzY1D72eQ9u7aga0/tLJRXF0Yqqodoh3A6Fh5939QOQIUlQPmjL6brw0lzoPlnO9vWgJ9To/Po0dRnF84zQ4dq56C88wG4XTuE07HwKEpEYxMBnKidgwovnMLxj72Qfl87R74dFOHYtDx5Mz3qnz9LX8CF6bzj2khFNa/ktQMLjy5e3fGwPrtwyhf/6vKd1UXKd/p8O7RjuM0m03X+DfVTPbmCt4eFwdeMdmHhUZKIxo4FcLF2DtL1qUVmwomrLVdf6VkZDnl6E9VcqzXBf51d90TMwMfv395zY6SiulQ7hFPxP4yemwAEtEOQLgECd/zW6t11r9mqnSVfloVDNdoZ3MIysuvc5KOhAyhubjNHcqdOUNxt3OlYeBRkt5H4snYOsofszurb/Wnjyincy0OhlHYGNzAGqRvrb12/1vTup52FVN2oHcCpWHh0XAOgq3YIso+SOsQf+FX6He0c+fBhKBjWzuAGz6TPm/OGNWaUdg5SNzJSUX2ydggnYuHRcYt2ALKf6EZMvniO9bZ2jlzb5ufU9PZaZA2a+UjqcxykTIfxKk8bsPAUWCIaOw1AXDsH2dPVM6wTj99kVmnnyKVDIn21MzjZblO6+Mrk/eO1c5CtfCZSUd1JO4TTsPAUHq/uUKMEKH7wF+ni0kPGPXtQiZRu8/u3acdwonrj33B63VP9UghwxWpqqBjAF7RDOA0LTwFlp6JfqJ2D7C1god9Tz6b/BWOMdpZcWRkKcmp6KxmD/ZcmH6zdjY5dtLOQLX1VO4DTsPAU1rXg3zm1QJf9GH3b65ZrFiVcGg7v087gJMbA3JW6YflSc9zx2lnItmKRiuop2iGchC++hfV57QDkHONWmilTlloLtHPkwvIwp6a3xuvWhJkvp0/nTBxqDgcvtwILT4EkorGTAXC3W2oxAeSm6dbg3jvNeu0s7bU2GCzSzuAUa6xec26r/zp/cqeWuDRSUd1dO4RTsPAUDq/uUKsJUP748+m6UL05qJ2lPbb7fVx3qgX2m6IV5ycf4Vo71FIhANdrh3AKFp4CSERjIQCf0c5BzhROYfCjL6bf087RHnUifQ3gmkHY+ZA2su3suic61yJcrJ2FHOVz2gGcgoWnMM4HV1amdjh2ByZc9zcH76wuUrwl4HftfmHtZQzqrq2/a/tmdOulnYUcJx6pqB6qHcIJWHgKg7ezqN3OfdecMmKNtUQ7R1slQiEWnkY8lbpywRzrhGHaOcixrtIO4AQsPHmWiMa6AjhPOwc5nwDBu16xenTZZxy5iN+ycGi/dgY7mpmOz/hR+uKJ2jnI0ThkogVYePLvKmQGlhG1m8+gx1PPprc6cWf1FaGQpZ3Bbraazu9+of7OSdo5yPEGRyqqT9QOYXcsPPnH21mUU6W1GH7vr9NztHO0Fqemf1KdCXx4Vt0Txxv4+H2YcoG3tZrB/2h5lIjGBgMYq52D3GfYBky5cK6zdlbf6fd3085gF5bBnvOSj/r3oaRcOwu5xpWRimrRDmFnLDz5dal2AHKva/5hjRq42XygnaOlkoK+FuD521rGIH1z/Tc/XGP69NfOQq7SH8B47RB2xsKTX9wolPJGgA4P/TwdLjlkarSztIhIeGMgsFk7hrYX05+a/SfrZI63oHzgba0msPDkSSIa6wGAe+FQXgUs9H/qufQHTtlZfUU49LF2Bk1LrAGzHkx9gdtGUL5cEamo5ut6I/gXkz8XgH+/VABd92HMN3/vjJ3Vl4VDB7QzaNljSpZclnyQPwRRPvUEcKp2CLviC3L+8HYWFcwpCTN50jLrXe0czUmEQo64EpVrKePbeEbdU73rEeASFZRvl2gHsCsWnjxIRGPFAM7SzkHeIYDv5j9ag3ruMhu0szRlfTDQQTtDoRmDA5cnKw/sRDlnqVEhnKsdwK5YePLjLADcAJAKSoBOTzyfPhiqN4e0szRml8emphsDc1/q+qWLzaAh2lnIMwZGKqoHaYewIxae/ODtLFJRVI8hD1elF2rnaEw90DcNpLVzFEq1NW7mL9JnjdPOQZ5zjnYAO2LhybFENOZDZsAykYr+2zHx2jfTM7VzHJVI8KNgYKN2jEJYZ/V45+b6b0zWzkGexNtaR8HCk3snA+ihHYK87YL5Znx8rbVUO8fRrAiFtmtnyLeDJrzy3ORjIwDhyrek4dRIRXVYO4TdsPDkHm9nkToBgndPs7p32m9sVy6WunxqumVk+9nJJzoeQthzA7TJNjoA4HpPR2DhyT1eSiRb8Bv0fPqZ9GafZVLaWRpaGXLvzGxjkPxC/Z1bN5ruvbWzkOdxHM8RWHhyKBGNdQYQ185BdFhZLUbc87K9NhldHwyWamfIlx+kLps/yxrO7wFkByw8R2Dhya1J4N8p2Ux8vZly/nxrjnaOw/b4fd21M+TDnPTQGf8vfdlE7RxEWbFIRTU3qG2AL865xXumZEuff9MacdwWs1o7BwCkgD71QL12jlzaZsoXfq7+bpYdshte5WmAhSe3WHjIlgQoeeildLBDrQ12Vhfxrw0GXTM1PWkCa8+se3KQBZ9fOwvRET6lHcBOWHhyJBGNdQQwUjsHUWOCFiJPPpdeZYed1VeE3TE13RjUfDr5XexFabl2FqKj4FXHBlh4cmciAP6ER7bWfS/GfuOP+jurLwuHbLv9RUsZg/St9V9fvcr0G6CdhagR3bnNxH+w8OQOb2eRI0xcbiZPWK67s/rKUMjxC/L9Mn3G7N9bE0Zr5yBqxnjtAHbBwpM7p2oHIGoJAXy3/MEa2GOXURtHsyEYcPTU9BVWv9n3pr7EH3LICVh4slh4ciARjZUCOFE7B1FLCdD5yefT+4IpU6tx/hqf7xiN8+bCXtNh6cXJh8Zo5yBqoVO0A9gFC09uTAAQ0A5B1BpF9Yh996W0yq2tNNA7CdRpnLs9Usa3+cy6J3smEeQ+ReQUJ0Qqqh19RTVXWHhygzsikyMN+BgTP/sPhZ3VRXxrQs6amm4MDl6ZvH/vNnR25cKJ5Fp+AGO1Q9gBC09u8IuJHOuiuWbcCeus5YU+7/JwaGehz9ke30ld+/4iMziqnYOoDXhbCyw8ucL1d8ixBAjd87LVpbzAO6svDYcPFvJ87fGX9OgZL6TP5eBPcip+7YKFp90S0VhfAN20cxC1h9+g19PPpjf6LJMu1DlXhYKOWLfqI6v73Bvrb+Nta3KycZGKascvBdFeLDztx6s75AodD2HUXb+xZhfqfJsCgbJCnautDpnQB+ckH48D4vkXC3K0LgCGaIfQxsLTfiw85Boj1pop57xrvVOIc+31+XoU4jxtZRnZ8ank4x0OoqhEOwtRDnh+rCkLT/uN0g5AlEvX/82KR7aaNfk+jwX0PCRiy3E8xqD+hvqpmz4yPfpqZyHKkWHaAbSx8LQfr/CQqwhQ+vBLaV9xrdmb3xOJ/Mumu6b/KH3R3LesE0do5yDKoaHaAbSx8LRDdod0bhxIrhNMY8ATz6dX5Htn9eXh0K58Hr8t5ltDZjyV+swk7RxEOcYrPNoBHG4EAA5mJFfqUYNxN1Xnd2f1peGQytYWjdlhOi66OnnvBO0cRHkQiVRUe3o8GgtP+/B2FrnalKVm0riEtShfx18dCtlmanrS+NefWffkgDT83CaG3EgAxLRDaGLhaR8OWCZXE8B/6+tW/2P2mE35OP6mgL9jPo7bWsZg70XJ76b2oKyzdhaiPPL0OB4WnvYZrh2AKN98QNcnn0vXBFIm55t97vf5euf6mK1lDKzb629clTD9B2pnIcozT4/jYeFpn+O1AxAVQnESQ7/78/T8XB/XEum+X2Rfro/bGr9JT5n5W2vyGM0MRAXCwkOtl4jGugGwxeV4okI4bismXTUjPSvXx/1Acdf0VVbft+9MffVUrfMTFRgLD7UJp6OT51wyx4wdut6syOUxl4fDu3N5vJbaZ4qXfzr58GiNcxMp6e/lmVosPG13nHYAokITIHzfr9PlHQ+Ynbk65rJwKJmrY7VU2vi2nFX3ZLckguFCn5tIkadnarHwtB0LD3mS36DP08+m1+dqZ/XVoWBBp4Ebg9qrk/fs3ooutt7LiyhPPDv2lIWn7XhLizyr/CBOvONVKyfjebb4A+W5OE5LPZK6ZtF8E/P09FzytGO1A2hh4Wk7XuEhTztxjTn1rEXW3PYe54BPCjY1/e/pUf98Jn3+KYU6H5ENeXZDXBaetmPhIc+74a/WsH7bzIftOYYR6Vrjk5pcZWrMJtN1/g31t0/J93mIbI5XeKjlEtFYAB7+oiE6TICyR15Mm6Kk2d+e43wQCuVlJefDak1w9dl1T8QA4d535HW8wkOtciwA7rdDBCCUxsAnnksvbc8xloVDeZuabhnZdW7y0fABFJfl6xxEDsLCQ63C21lEDfTcg/Ff/VP6n239/OWhUH0O4/ybMUjdWH/r+rWmd798HJ/IgXpEKqqD2iE0tPgqhYicgMzGY0WHHzPGvJSPUA4Q0Q5AZDenv28mvTfQem/+EF+rN9VdHQqF8pHpmfT5c96wxkzOx7GJHEoA9AGwTjlHwbXoCo+IPADgf7JvpwF4AsCFecxld8doByCyGwH833rNOrb7HrO5tZ/7ccDfKdd5FlmDZj6SuoZlh+i/eXIMaktvaV0O4AwAW40x1wMYAcDLK5R21Q5AZEc+oNuTz6V3t3Zn9QMifXKZY5cpW3xF8gFOPyc6Ok+O42lp4TlkjLEApESkI4Bt8PY4lm7aAYjsqkMSw77zi1burC5Svsvny8l2FfXGv+GMuif7peHnxAKio+MVnia8KyKdADwDYCGARQBa9w3NXXiFh6gJg7Zg0hWzWrez+spQqNW3wo5kDPZdmnywdjc6dmnvsYhcjFd4GmOMuckYs8cY81MAZwH4QvbWllfxCg9RMy6fbcZEN5hES5+/LBxq1+KDxsDcmfpyYqk5zrN7BRG1kCfHobZ00LKIyOdE5H5jzDoAe0RkbH6j2Rqv8BA1Q4CiB36VLis7aHa15PnLw6FUe873O2vijN+kT/Py9yWiluqoHUBDS29p/RjAeABXZ3+/D8CP8pLIGVh4iFrAb6Hv08+m10pmDGCT1oSCbZ6a/i+r95xv1d90als/n8hjWHiacLIx5usAagHAGLMbQF7WzbC7RDTmB5DzKbREbtXpAE66/bfWzOaet83v79yW4+83RSsuSD7c6rV/iDyMhacJ9SLiB2AAQES6A2j2JzaX6gyuUE3UKqNXmymnL7bmNfWcQyKtHkiZNvLx2XVPdK5FuLjt6Yg8h4WnCT8E8DsAx4jIwwBmA3gkb6nsjQOWiVpJAPnqn61Y3+1mbeNPkrLtft/2lh7TGNRdW3/Xjs3o1isnIYm8g4WnMcaYXwK4A8CjALYAuNgY80o+g9kYx+8QtYEAHR97MZ0OJ82Bxp6TCIW2tPR4T6Y+8+4c64RhuUlH5Cme3Ei32cIjIj4RWWaMWWmM+ZEx5n+NaflUUxfi+h5EbRRKYdDjz6eXNPbxpeFwi6amz0gP/+eP0xdNyF0yIk8JRCqqO2iHKLRmC092heX3RaSfiBgRefrwx0TkdhGpbM0JReRUETmlwe9fFJHLW3MMZZ77IiHKpd67Mf6Gv6RnHO1jK8KhdHOfv9V0XnBd/R3cI4uofTx3W6ulY3h6AViOzEDlG0XkLyLyh9aeTEQCAE4FkJM9brLrAxV6ALGX9xAjyomz3jMTTlptLT7y8Q+DgaKmPq/OBNecWffkEAMfJw4QtQ8LTyMeBHABgDoAvwSwHcDTDZ8gIv1F5E0RWZL9tV/28RdF5Hsi8g8A0wDcCOA2EVksIpOynz5ZROaIyIcNr/aIyLdFZEH2mA9mH4uISEJEfozMFhfHish+EXlYRN4Xkbki0qPtfyXNavIbMhE1T4DAt39r9elaYz4xZmeH39/oLWPLYM95yUcC+9HBc9+oifKgXDtAobV00PIMY8wMZK7w3A5gEoAjfzr7XwAvGWOGI1OKftjgY4MBnGmMuQzATwF83xgz0hhzeK+dXgAmIlOqHgMAETkbwPEAxgIYCeAkETl8GXtI9lyjjDHrAZQAmGuMGQFgJoAvt/QvoA14hYcoB3wG3Z96Lr0zkDbJw4/VivQ12eUvGjIGqZvrv/nhGtOnf2FTErmW535waOnWEuNEZAEyxWIHgH4APj7iaeMB/Cr7/s+RKTCHvWKMaere/OvGGMsYswLA4aszZ2ff3kPmSk4UmQIEAOuNMXMbfH4SwPTs+wsBRFry52ojXuEhypGSOpzwwC/T//m/LNJhq9+/9cjnvZA+5+0/WSefWNBwRO7mudeylt7S+l9ktpWwABQDuAWZVZdLmvichj+lNToNNauuwfvS4NdHs1eCRhpjBhljnmvkePXGmMPnSwMINHO+9uAVHqIcGrIJky9925p9+PeJcOgTP0y9bx036zupz08pfDIiV/PcOLgW/4GNMf/K/po2xvwvMqXjSw2eMgfAVdn3r0FmccKj2YeWrQHwVwBfFJFSABCRPiJihx1e81mmiDzpMzOt0YM3mpUAsCwc2nf48T2m5P3LkpXj9JIRuZZfO0ChtbTwHBSREABLRJ4QkdsA7MEnVx2+BcD1IrIEwLUAvtnIsf4I4JIjBi3/F2PMG8jcIntHRJYCeBX2WCzJc62YKN8EKKr8Zbq09KDZvSKUmZqeMr6NZ9Q91SeFQFA7H5ELee61TP5zJ6iJJ4n0R2bMTgjAbciM7v7x4as+XpKIxh4GcLd2DiI32l2Cdx/6Kur/tGnz8EuS39m42Awaop2JyKWuXPfY+Z7aMaGls7TWG2NqjTF7jTEPGmO+1bDsiMhv8xfRdjx3GZCoUDofwOivv2b23pe6finLDlFeee4KT67GoxyXo+M4AQsPUR7tL/lKqPOa2SXfMD/RjkLkcudrByioXDW85u+LuYfnWjFRoaztf+7snd1HnhYqvaIIQL12HiIXa3YbF7fhi3fr1TX/FCJqre1dhy9eGzn/ZADwBXoc7wsOmKOdicjFWHjaSJp/imsc0g5A5Db7O/Rau/SEr/SHyL9nZAVLLhgL+D7SzEXkYiw8bXRnjo7jBCw8RDmUDJbsWjC6wgeRzg0fFwkWB0vO2a6Vi8jlLO0AhdbkoOXs+jdHG58jAEx236zDa+Z4BQsPUY5Y4k/OHfvABuMLjDjax/2h6EmpQ3PfNtauCYXORuRynrvC09wsrQsKksJZWHiIcmTBSRULUsGSJstMqOyKwXU1/7cHQKcCxSLyAhaehrI7kdMnsfAQ5cDy2HUzDpT2bnaPLPGVdPcXnTwrXTuv0ZXZiajVDmoHKLRW7ZYuIvtFJCkiaRHZm+9wNsXCQ9ROH/U9fc7Hx4ye3NLnB4pOmQgpej+fmYg8Zrd2gEJr7W7pq5HZLf0GAP+Tr1A2x8JD1A67OkeX/mvgpSdCpMWzO0VEQqWXlwJI5jEakZfs0Q5QaK3dLd2f3S39BQCn5S+WrbHwELXRweLuGxYP/3pPiBS19nN9gWMG+oKD3slHLiIPYuFpxOHd0hc32C29JI+57IyFh6gN6v3FNfNH35OE+Lq39RjBkvNOBnzrchiLyIvqp06bfkA7RKG1tPBcm33uzQAOADgWwKX5CmVzLDxErWSJLzXv5Pv/ZfmDA9tzHJFAUbDkfM+NPSDKMc9d3QFaXnguPnK3dHh3yrrnRrYTtdfCUVPfSYY6npSLY/lDx48Sf7fZuTgWkUex8DThC0d57Loc5nCSHdoBiJxk5eCrZ+zrGMnplPJQ6eUxALtyeUwiD2HhOZKIXC0ifwQwQET+0ODtnwB2FiShzcRWJvYCqNXOQeQEm3pNmLe514Scr58jvg5dA0XjV+T6uEQe4cnC09xKy3MAbAHQDcDTDR7fB2BJvkI5wMcA+muHILKzPR2PS6wafHUcIrnas+8TAsXjJ6bq3lsMUzsyH8cncjFPjoNr8huRMWa9MeafxpjxAFYCKMu+bTTGpAoR0KY+1g5AZGeHwl22LBp1W2eIdMjneUJlV5YDqMvnOYhcyJNXeFq60vIVAOYDuALAlQDmicjl+Qxmc1u1AxDZVcof3j9v7L17Ib6e+T6Xz99tgC84eG6+z0PkMp4sPM3d0jrsXgBjjDHbAEBEugP4O4BX8xXM5niFh+goDCQ9b8x9Kyx/eGyhzhksOXd83Z41HwLp4wp1TiKH82Thaem9dd/hspO1sxWf60YsPERH8d7IW96uK+pcsLIDACL+ULDk0/sAmEKel8jBPDnDsaVXeP4sIn8F8Ovs7z8D4E/5ieQIvKVFdITVAy+duafT4BZvCJpL/tBxI1K1x8wy6W3cUZ2oeRu0A2ho6VUaA+D/AAwHMALAz/KWyBl4hYeoga09xry7oe/pEzQzhEovOwFcJ4uoJdZqB9DQ0sJzljHmNWPMt4wxtxljfgfg3HwGszkWHqKsvWX9Vq+IfmEIRPyaOcRX3DlQPHGVZgYih1inHUBDcwsPfk1ElgIYIiJLGrythbfX4eEtLSIAdaGO2xeOur0DRMq0swBAoGjsBEiHRdo5iGzs46nTpntyT8jmxvD8CsCfATwKoKLB4/uMMZ4c9JTFwkOel/YFD80de/824/MP087SUKjsyq7JvS/WAijSzkJkQ568nQU0v/BgjTFmnTHm6uwihIffvFx2EFuZ2AeOFSAPM4CZP+ae99OBYluVHQDw+bv094ViXJuH6OjWaQfQ4uWp5e21WjsAkZYl8a/NPFTcfZx2jsYEO5w9AfD/SzsHkQ3xCg+12gfaAYg0fBg5b9bOridM0c7RFBF/MFh64UFwbR6iI63TDqCFhaftWHjIc7Z3G/7euv7n2fbKTkP+4IDh4u85WzsHkc3wCg+1Gm9pkafsL+m9dumwrwyASFA7S0uFSi8dDsi25p9J5BksPNRqvMJDnpEMluxacNKdPoh00s7SGuIrKg8UT16jnYPIJiwAH2mH0MLC03arwfEB5AGW+JNzxz6w0fgC/bWztEWg6KTxkJJ3tXMQ2cDmqdOmJ7VDaGHhaaPYysRBAJu1cxDl2/zRdy1IBUuGa+doj1DZlT0BHNTOQaRsnXYATSw87cPbWuRqy4Z+ccbBkl6qe2Tlgs/fua8/NGy+dg4iZZ4ee8rC0z6e/uIhd1t/7JlvbzvmJFtPP2+NQIezJgIB/pBCXvaedgBNLDztw2+e5Eo7O8eWrjnu4tHaOXJJxBcIll5cj8zATSIv8vQ+cyw87bNSOwBRrh0sPuaj94ff1BsiYe0sueYP9hsm/t5cm4e8yALwvnYITSw87ePpy4PkPvWB4pr5Y+6uh/i6amfJl1DZJSMB4QbA5DX/mjpt+n7tEJpYeNohtjKxGcAW7RxEuWCJr37u2AfWWL7gQO0s+SQS7hgoPm2ddg6iAvP8D+gsPO3H9T3IFRaeePvc+lDZido5CiFQNHIcpJSztshLWHi0A7jAAu0ARO2VGHLNjH1l/Sdp5yikUNln+gA4oJ2DqEBYeLQDuACv8JCjbeo1ce6WnuMna+coNJ+/vI8/NJz/f8krPD1DC2DhyQV+wyTH2l0+aMWqwVcNh4hoZ9EQ6HD6RCDI2ZbkdhunTpu+QzuENhaedoqtTGyHhzdjI+c6VNRl83sjv9kVIh20s2gR8flDpZdYANLaWYjyyPO3swAWnlzhVR5ylJQ/vG/emPv2Q3w9tLNo8wX7DpVA37e1cxDlEQsPWHhyhQOXyTEMJD1v7P0rLX9osHYWuwiVXjQKEC4xQW7FwgMWnlzhFR5yjEUjb327LtxpjHYOOxEJlwU6nMlb0+RWnh+wDLDw5AoLDznCB4Mum1nTaZDnZmS1RCAcP1l8Hedp5yDKsXVTp01nmQcLT07EVib2AFilnYOoKVt6jF2wsc9pE7Rz2Fmo9Mp+APZp5yDKoX9oB7ALFp7ceUs7AFFj9pb1/yAR/XwUIn7tLHYm/o69/OFRHO9AbsLXpiwWntz5u3YAoqOpC5Vve/fEqaUQKdPO4gSB4lMnAsEV2jmIcoSFJ4uFJ3f+AcDSDkHUUNoXOjh37P07IP7e2lmcQkR8obLL/ODaPOR8q6ZOm75ZO4RdsPDkSGxlYjc49Y9sxABm3ph7lqQDRUO1sziNL9B7iC/Qb7Z2DqJ24tWdBlh4cutN7QBEh70fv2lmbXG3cdo5nCpYeuFoQDZq5yBqBxaeBlh4couFh2zhw8gFs3Z1HTZFO4eTiYRKgh3O5mKE5FQGnKH1CSw8uTUbQFI7BHnbtm4jF63rfw6v7OSAPzxsjPg6zdXOQdQGS6ZOm75TO4SdsPDkUGxl4iCAd7RzkHftL+n94bJhNwyESFA7i1uEyq4YAKBGOwdRK/F21hFYeHKPt7VIRTJYunPBSXcGIFKuncVNxFfWwx8+6X3tHEStxMJzBBae3GPhoYKzJFA3d+wDm4wv0E87ixsFiidPhISWaecgaqEUgBnaIeyGhSf35oNL01OBzR9918JUsMNw7RxuJSK+UOnlYWReSIjsbuHUadP5OnQEFp4ci61MpAD8VTsHecfSoTfMOFjS8xTtHG7nC/Q83hcYwLV5yAmqtQPYEQtPfrymHYC8Yf2xZ769/ZhRnH5eIMHSC04GfBu0cxA14xXtAHbEwpMf0wHUaYcgd9vZZeiSNcddPFo7h5eIBIuDJed8rJ2DqAkrpk6bSbL2TgAAH2NJREFUvlI7hB2x8ORBbGViH7iZKOXRgeIe69+Pf60PRMLaWbzGH4qOFl/nOdo5iBrxW+0AdsXCkz/8oqO8qA8U18wfc5cF8XXVzuJVobIrjgewRzsH0VG8qh3Arlh48ucP4IwOyjFLfPVzxz7wofEFB2hn8TLxlXb3F41dqp2D6Airp06bvkQ7hF2x8ORJbGViJ7gOAuXYuyfeMbc+VDZKOwcBgaIJEyFhvriQnfDOQhNYePKLs7UoZ1YMuXbG/rJjJ2nnoAwRkVDpFSUA6rWzEGXxdlYTWHjy63fI7FhL1C4be0+eu7XnyZO1c9An+QLHDPQFB76tnYMIwNqp06Yv1A5hZyw8eRRbmdgCbiZK7bS7/PgVHxx/5QiIiHYW+m/BkvPHAb712jnI83hHoRksPPnHL0Jqs0NFXTe9N/KWbhAp1s5CRycSKAqWnLdTOwd5Hm9nNYOFJ/9eAW9rURuk/EV754259yDEd4x2FmqaPzT4RPF15a0t0rIBwDztEHbHwpNnsZWJjwC8pZ2DnMVA0nPH3veB5Q8dr52FWiZUdkUUwC7tHORJr02dNp0/WDeDhacwntMOQM6yaNRtbyfDnbhthIOIr0PXQNH4Fdo5yJN+oR3ACVh4CuN3AHZrhyBn+GDQFTNrygdyRpYDBYrHT4QULdbOQZ6yaOq06e9qh3ACFp4CiK1M1AL4tXYOsr8tPcfN39hnykTtHNR2obIrysHNg6lwntEO4BQsPIXD21rUpJqyyKrEkM8NhQj/XzqYz999gC84eK52DvKEAwB+qR3CKfiNtUBiKxOLAPBSNx1VbajTxwtP/FZHiJRqZ6H2C5acMw7wr9XOQa738tRp0/dph3AKFp7Cel47ANlP2hc6OG/sfbsg/l7aWSg3RALhYMkFNdo5yPV4O6sVWHgK65fgvX1qwECseWPvXZoOFMW0s1Bu+UMDR4q/+2ztHORa70+dNp1r77QCC08BxVYmdgF4XTsH2cf7w78+q7ao68naOSg/QqWXDwOwQzsHuRKv7rQSC0/h8bYWAQDWDLhw1q4usSnaOSh/xFfcOVA0YZV2DnKdg+DaO63GwlN4fwfAjQY9blv3UYvW9zt7vHYOyr9A8ckTIMWLtHOQq/xm6rTpHCPWSiw8BRZbmbAA/Eg7B+nZV9JnzbKhXxoIkYB2FiqMUNmVXQHUaucg1/iZdgAnYuHR8Qwy6yeQxySDZTvePenOEETKtbNQ4fj8Xfv7QlEOMKVcWDZ12vR3tEM4EQuPgtjKxB4AVdo5qLDSvkDt3JPv32J8/mO1s1DhBTt86hTAv0Y7Bzker+60EQuPnv8HgLvbeoQBzILRdy1KBTrEtbOQDhF/MFh64QHw/z213S4AL2iHcCoWHiWxlYkPAPxZOwcVxtJhX555sEPPU7RzkC5/cMBw8ffg2jzUVv8zddr0/dohnIqFR9fT2gEo/9b1O/vtHd1Hcvo5AQBCpZfGAdmunYMc5wCAH2qHcDIWHkWxlYm3ACzUzkH5s6PLsPc/HHDhGO0cZB/iK+4UKJ60WjsHOc7Ppk6bvks7hJOx8Oh7UjsA5ceBDj3WL4l/7ViIhLSzkL0EikafAil5VzsHOUYSvCPQbiw8+l4F8KF2CMqt+kDJ7vmj77Yg0kU7C9lTqOyKYwAc0s5BjvDS1GnTN2mHcDoWHmWxlYk0gO9p56DcscRXP/fk+9cbX2CAdhayL5+/Sz9/aOh87RxkexaAJ7RDuAELjz08D2CbdgjKjXdPunNefbB0pHYOsr9Ah7MnAAGO56GmvDp12nR+jeQAC48NxFYmDgF4XDsHtd+K6Odn7C/tO1E7BzmDiC8QLL2oDpmf4omO5lHtAG7BwmMfPwbAe7QOtqHPlHe29hg7WTsHOYs/2P8E8ffi2jx0NH+ZOm36Yu0QbsHCYxOxlYlaAA9r56C22dVp8PLVg64YBRHRzkLOEyq9ZAQgH2vnINt5RDuAm7Dw2MuzANZqh6DWOVTUbePiEd84BiJF2lnImcRXVB4oPpX/96mht6dOmz5LO4SbsPDYSGxloh7Ag9o5qOVS/qK988bcUwvxddfOQs4WKBo1DlK6QDsH2cZ3tAO4DQuP/fwCwErtENQ8S3ypuWPvX235Q4O0s5A7hMqu7IXMFgLkbX+fOm36G9oh3IaFx2ay6/I8oJ2Dmrdo5LfmJMPlJ2nnIPfw+Tv19YfiXIHZ2wyACu0QbsTCY0+vAHhfOwQ1btXxn/n/7d15lJzVfebx76+qulu9SQjEIiFAQUKoACGhpUFCEtBgvETYZmc4ntgQ2xTpHIxTZnBCgOAsODP22IxduENi4sSO7bJ9QmJXxjY505YASa0FBGipslgEIkiAQGiXutXdd/54S6YFWnqpqlv11vM5p456qXrfp7V0P7rve+9dvHPU72lGlhRcrOHyeRD7re8c4s1PkumM9lgsAhWeMhTPZR1wr+8ccnibT7loxevj5s/3nUPCySwSrW26uhetzVONDgD3+A4RVio8ZSqey/4CWO47hxxq+8jfy+XO/tS5mOnfjhRNpOa0cyx2qtbmqT6PJNOZl3yHCCt90y5vuo5bRvbXjd7yzAV/MgqzRt9ZJPxqmz55AdgW3zmkZHaimVlFpcJTxuK57CKC+3nEs95I7Z7Olnt3YJGxvrNIdTCra441tG7ynUNK5q+T6Yz2VCwiFZ7ylwT2+g5RzRzW19ly77q+aN0U31mkusTqpl1IZKQubYffRuAh3yHCToWnzMVz2dfQ8uJePTvtj5/sGnF8i+8cUp3qmm44HdjtO4cU1f9IpjNdvkOEnQpPZfgaoBvZPHjxzE8+8e7oKZf4ziHVy6Ijx0brpmuacng9mUxnfuY7RDVQ4akA8Vy2C/ii7xzV5s0TZz696bQr5vrOIRKrv2w+1Kz3nUMKzgF/4jtEtVDhqRD5aeq/9J2jWuxqOu3FdefcMgmzmO8sImYWqW2+NgL0+s4iBfVoMp0p2sraZvYNM7uz3/u/NrN/6Pf+181sUIXLzO40s4ZC5iwVFZ7K8gWg23eIsOuqad66asZdIzAb5TuLyEGR2LgpkdjpWpsnPN4A7iryOZYCcyEozcAY4Nx+n58LLBnkMe8EBlV4zCw6yHMUhQpPBYnnsi8A3/CdI8x6I7H9nRfe/5aLRMf7ziLyfjVNH58F9rrvHFIQdyTTmXeLfI4l5AsPQdFZC+wys9FmVgfEgdVmdpeZrTSz583sAQAzazSz/zCz58xsrZndaGZ3AOOA35jZb/LPu9LMlpnZM2b2UzNryn/8FTO7z8yeAq43s0Vm9rdmtsLMNphZyVerV+GpPH8F6BteEThwK2bds7o3Vn/usZ8tUnpmtY2xhiv177/y/TyZzhR9jTXn3Gagx8xOJyg+ywhW8J8DzAKeBy4FzgJagOnATDNbAHwE2Oycm+acOw/4lXPu/wCbgcucc5eZ2Rjgz4ErnHMzgFUcek/SfufcPOfcj/Pvx5xzLQSjRCXfJFuFp8LEc9ndwJd85wijNefd9sS+hpPm+M4hcjSxunNbLDKq03cOGbKdwB+V8HwHR3kOFp5l/d5fClyZf6wGngGmEBSgNcAV+VGZ+c65HYc59kXAOcASM3sW+DRwRr/Pp9/3/H/N//o0MGHYX9kgqfBUoHgu+2PgF75zhMnGMz7y1Ntjztf0c6kItc03TCD4wSmV50+T6UwpR+kO3sczleCSVifBCM/B+3cMeNA5Nz3/mOSc+65zbgMwk6D4PGhm9x3m2Ab8Z7/XnuOc+8N+n9/zvucfXGuoFyj5hBAVnsp1G1Ds679VYesJU5/dOGGhFhaUimGR5lOidTOe9Z1DBm0p8J0Sn3MJsBDY5pzrdc5tA44jKD3LgF8Dt/a79+ZUMzvJzMYBe51zPyBYC25G/ni7gOb8253AxWY2Kf/aBjObXKovbLBUeCpUPJfdAtzhO0el29NwyitrzrvtDMxqfWcRGYxY/SXzsNq1vnPIgHUDn02mM67E511DMDur830f2+Gce9s59zjwQ2CZma0BfkZQaKYCK/KXqu4huH8U4BHgl2b2G+fcVuAzwI/M7Pn8Ocp2Cx5zrtS/91JI2Snxfwc+7jtHJequady2ZM7f7HSR2ATfWUSGoq9ny4buXT86Ew+XB2TQHkimM3/hO0Q10whP5dOlrSHos2h3Z8v9r6nsSCWLxMZOjsQmaG2e8rce7YnonQpPhYvnsm+gS1uDtnLm3St6ahqn+c4hMlw1TVe1QOQ13znkiBzwuWQ6o0VjPVPhCYF4LvsD4Oe+c1SKdfHPLN7TdOo83zlECsGspqGm4cNv+s4hR/SdZDqz1HcIUeEJk9uAbb5DlLtN4y9b+uZJsxb4ziFSSNG6+CyLjF7mO4d8QI7ibx8hA6TCExK6tHVs2447e+2LE6+dgZn5ziJSaLXN108CDrc4nPixH7gxmc7s9R1EAio8IRLPZf8FeMx3jnK0t/7E156d9scnYzbCdxaRYrBI04nRutnP+84hv/OlZDqjP48yosITPp8FNvkOUU4OROt3rJh1TzcWOdF3FpFiitXPm4fV6Yesf/+WTGdSvkPIoVR4Qiaey24DbgQO+M5SDvos0rP8wvte7IvWTPSdRaTYzMxqm65rQP/+fXoNuNV3CPkgFZ4QiueyncCf+c5RDp6+ILmsu3bkTN85REolEjt5UqTmzCW+c1SpXuDmZDqjtdHKkApPeH2dKt9gNDf5psW7Rk6Y7zuHSKnVNC68CCKv+s5Rhb6STGe0EGSZUuEJqXgu6wj2OKnK+3k2j527fPPYeSo7UpXMYiNqGj/2ju8cVWYR7+03JWVIhSfEqvV+nu0jz8zmJt98Hmb6+y1VK1o7eYZFTtCCd6XxNvCpZDrT5zuIHJl+IIRc/n6eP/Wdo1T21R2/5ZkLvjgas0bfWUR8q22+bjLaa68UbkmmM6/7DiFHp8JTBeK57Nepgq0neqJ1u5e3/PlOLHKK7ywi5cAijWOiIy5a5ztHyD2UTGcyvkPIsanwVI/PAKG9idFhfctn37u+L1p3tu8sIuUkNmLOxdiI53znCKnFaOuIiqHCUyXiuey7wPUEy52HzuppdzzVNWJ0i+8cIuUmWJvn+mZAu3UX1svAtcl0pqrukaxkKjxVJJ7LriQY6XGeoxTUCxOvfmL76MnaEFTkCCKxE8+M1JylG5gLZydwVTKd0Uy4CqLCU2XiuWwaeMB3jkJ586SZq14bf/nFvnOIlLuaxo/OgchG3zlCoJdgU9D1voPI4KjwVKF4LvsA8GPfOYZrZ9NpL6yL3zIZs6jvLCLlzixWV9O4cLvvHCHwpWQ68yvfIWTwVHiq1y3Act8hhqqrduTWp2fcVY/ZSN9ZRCpFtHbSBRYdo20nhu7vk+nMN32HkKFR4alS8Vx2P/BJgo3uKkpvpGZfZ8t9b7lIdLzvLCKVprbpujige08GbxHQ5juEDJ0KTxWL57JvAFcBu31nGSgHbsXse57tjdWf6zuLSCWySMPxsRFzc75zVJiXgOs0I6uyqfBUuXgu+xxwM1ARS6I/f17iiX31J87xnUOkksXqL7oYq1/tO0eF2IFmZIWCCo8Qz2V/AdztO8exbDzjY0+9M2bqJb5ziIRBbfMNownpulwF1AvclExnsr6DyPCp8AgA8Vz2a8AjvnMcydYTzn9244SPXeg7h0hYRKInTIjUnF2xExdK5E7NyAoPFR7p73bgJ75DvN/uxnEb15z3+QmY1fjOIhImNY0fmQPRl3znKFP3JtOZb/sOIYWjwiO/E89l+4BPAf/Xd5aDumsat62ceXcEs+N8ZxEJG7NobU3TVbsJ2errBfA/k+nMX/kOIYWlwiOHiOeyB4DrgCd8Z+mzaHdny/3/5SKxM3xnEQmraM2Z0yx68lO+c5SR7yTTmbK/p1EGT4VHPiCey+4jmK7+tM8cK2d9eWVPTeP5PjOIVIPapmumAlt95ygD30dr7YSWCo8cVjyX3Ql8GPCyX8za+C2L9zSO0x5ZIiVgkfrjYvXzN/jO4dljwC3JdEaX90JKhUeOKJ7LvgN8CCjphoObxl++9K2TZ2n6uUgJxUbMvhhr8Dqq69HjBNPPe30HkeJR4ZGjiueym4ErgM2lON87o+NrXpx49cxSnEtEDlXbfMOJwD7fOUrsSeDqZDrT7TuIFJcKjxxTPJd9GbiSIu+/s7f+pE3Pnf9HYzGrK+Z5ROTwItHjT4/UnrPCd44SWgUsTKYze30HkeJT4ZEBieey6yhi6TkQq9+xYvafHcAiY4pxfBEZmJqGD82F2Au+c5TAOuAjyXRmp+8gUhoqPDJg8Vz2GeAS4I1CHrfPIj2dLfe/1BepmVjI44rI4JlFa2qaPrGfcK/NkwU+pP2xqosKjwxKfqRnPrCpUMd8esaXlh2obZ5RqOOJyPBEa86YatGxT/rOUSSrgAXJdGaL7yBSWio8MmjxXPZFgtLz4nCPlZ188+JdzWfMH34qESmk2qarp4G95TtHgS0CWpPpzNu+g0jpqfDIkMRz2U0EpWfdUI/x+th5nVvGzlXZESlDFhkxKlZ/SZj22fo58NFkOrPLdxDxQ4VHhiyey75BcE/PM4N97bujJq3/7eSbzsdMfwdFylRsxIw5WNNK3zkK4PvAtcl0Zr/vIOKPftjIsOQXJ2wFlgz0NftGHL959fQvnIBZQ/GSiUgh1DZfPxao5Gnb3wI+nUxnenwHEb/MuTDfiC+lkp0SbwT+Hbj8aM/ridbtemruV7f0RWsnlyaZiAzXgT2PL+7tXluJq59/JZnO3O87hJQHjfBIQcRz2T3A7xOUnsNyWO/ylvtyKjsilSXWcMXFEPut7xyD4IA7VXakPxUeKZh4LtsFXEMwhPwBq6d/YUlX3XGzS5tKRIbLLBKrabq6B+jznWUAeoFbk+nMQ76DSHlR4ZGCiueyffFc9g7gi/T75vjCxGsXbz/urAX+konIcERrTjvXYqeW+9o8+4Hrk+nM93wHkfKjwiNFEc9lvwlcB+x74+TZq14bf9k835lEZHhqmz55AVi5Lti3mWBBwcd8B5HypMIjRRPPZR87EGu8ZP2UPzgOs6jvPCIyPGZ1I2MNra/6znEYy4FZyXQmDFPopUhUeKSozl+7aiUWaQVW+84iIsMXq5t2EZHmctpR/Z+BS7RVhByLpqVLSaQSHQ0E35iu9Z1FRIanr3fH5u6d3x0JNPmMAdydTGe+5jGDVBCN8EhJtLW37gWuB/7SdxYRGZ5IdNS4aN20pz1G2AEsVNmRwdAIj5RcKtFxI/CPQL3vLCIyNM719XZtT22AA/ESn3oD8PFkOlNJ6wJJGdAIj5RcW3trGlhAMKtCRCqQWSRa23wNBOvelMqvgQtVdmQoVHjEi7b21lXADOBXvrOIyNBEYqfGI7HTnirR6f438PvJdGZ7ic4nIaNLWuJVKtFhBIsUPgjUeo4jIoPkXPfuru2pneDGFekUe4Dbk+nM94t0fKkSKjxSFlKJjhnAjwDtsyVSYXq61q7o2ft4SxEOvQq4OZnOvFCEY0uV0SUtKQtt7a3PEFzietR3FhEZnFjdeS0WGdVZwEM64H8Bc1V2pFA0wiNlJz+L6++AUb6ziMjAuL5db3Tt+PsGYOQwD7UZ+INkOvP/ChBL5Hc0wiNlJz+Lazqw1HcWERkYizSfEq2bMdwV1X8OTFPZkWLQCI+UrVSiIwrcD9yDyrlI2XPO9XVtT2Wh+9xBvnQ/kEymMw8XI5cIqPBIBUglOhYAPwBO851FRI6ur2fLb7t3/WgiEBvgS9YA/y2ZzqwrYiwR/a9Zyl9be+sTwDTgX31nEZGji8TGnh2JnbFkgE//NtCisiOloBEeqSipRMfngW+ibSlEypZzB/Z2bU9tg77xR3jKZuDzyXTmP0qZS6qbRnikorS1tz5CcEOzbmoUKVNmNQ01DR9+4zCfcsAjwDkqO1JqGuGRipVKdNxEsNz8WN9ZROSDunY8usz1bZ+Tf3cD8LlkOvOEz0xSvTTCIxWrrb31x8AU4CFKu4GhiAxAbfMNE4F3gL8hmG6usiPeaIRHQiGV6JgGfAeYc6znikjJLO098PJtd3z3s2t9BxFR4ZHQyG9E+ofAV4ETPMcRqWZvA3cD/9jW3qofMlIWVHgkdFKJjhOAvwVuBcxzHJFq4oB/AL7c1t66zXcYkf5UeCS0UomOOcDDBLO6RKS4VgO3t7W3LvcdRORwdNOyhFZbe+syYBZwJ7DTcxyRsHoXuAOYrbIj5UwjPFIVUomOsQRT2G/ynUUkJHYTLAL6tbb21h2+w4gciwqPVJVUomMu8BXgct9ZRCrUfoIZkQ+2tbdu9R1GZKBUeKQqpRId8wmKz6Weo4hUih7gUeArbe2tr/sOIzJYKjxS1VKJjkuBB4AFnqOIlKs+4IfAX7S1t77kO4zIUKnwiACpRMflBMXnYt9ZRMrIY8C9be2t2s1cKp4Kj0g/qUTHlQTF5yLfWUQ8ehy4p629dZXvICKFosIjchipRMdHCYrPbN9ZREroKYKioz2vJHRUeESOIpXoWEhQfGb4ziJSRCsI7tH5pe8gIsWiwiMyAKlExyeA+1DxkfDoBn4CfKutvXWF7zAixabCIzII+e0qbgduAOo8xxEZiteBduCRtvbWt3yHESkVFR6RIchvUHoLkAAmeo4jMhBPAt8CHmtrb+3xHUak1FR4RIYhlegw4EMEoz5XAVG/iUQOsQ/4F+Dbbe2tz/kOI+KTCo9IgaQSHeOBz+UfYz3Hkeq2EXgY+G5be+u7vsOIlAMVHpECSyU6YsAnCEZ9WgHzm0iqyH8C3wYybe2tfb7DiJQTFR6RIkolOiYT3OfzGWC03zQSUq8CPyUYzcn5DiNSrlR4REogleioB64BrgM+DNT7TSQV7mDJ+ammlIsMjAqPSImlEh0NwEcJCtBCYKTfRFIhXgV+BvxEJUdk8FR4RDxKJTpqgSsIys8ngDF+E0mZ2cR7IznLfYcRqWQqPCJlIpXoiALzCcrP1cB4v4nEk03kR3KAFW3trfomLVIAKjwiZSi/vk8LQfm5BpjkN5EU2SvAYwQlZ7lKjkjhqfCIVIBUomMqwajPZcCF6KbnSvcKsAhYDCxqa299xWcYkWqgwiNSYVKJjhpgJjCP4BLYxcAJXkPJsbxMvtwQFJxNfuOIVB8VHpEKl7/8FScoQAdL0ASfmYSXOHQE5zW/cUREhUckhFKJjlN5r/zMA6YCEa+hwu0FDh3Bed1vHBF5PxUekSqQSnSMAuYCFxGMBk0BzgJG+MxVgV4H1gHr+/26vq29dbvXVCJyTCo8IlUqleiIAKcTlJ+z+z2mAOM8RisH/8WhpWYdQbHZ4TWViAyZCo+IfEAq0dEMTOaDZWgy4Zgh5oB3gbcIVjA+WGoOFpudHrOJSBGo8IjIgOVvkB4PnAKcdITHyQSzxkYBTZRut/i9BAVmII+tbe2tPSXKJSJlQIVHRIomf9lsJHAcQQHq/4gCfQSjLX39HoN5fzf5EtPW3rqnVF+XiFQeFR4REREJPU1TFRERkdBT4REREZHQU+ERERGR0FPhERERkdBT4REREZHQU+ERERGR0FPhERERkdBT4REREZHQU+ERERGR0FPhERERkdBT4REREZHQU+ERERGR0FPhERERkdBT4REREZHQU+ERERGR0FPhESlzZnaPma0zs+fN7Fkzu3AIx7jUzOb2e/97ZnZdYZOKiJSvmO8AInJkZjYHWAjMcM51mdkYoHYIh7oU2A0sLUAmA8w51zfcY4mIlIpGeETK21jgbedcF4Bz7m3n3GYzu9zMVpvZGjN71MzqAMzslXwpwsxmmdkiM5sAJIAv5keI5uePvcDMlprZy/1He8zsLjNbmR9ReiD/sQlmljWzh4FngNPMbLeZ/bWZPWdmnWZ2cql+U0REBkuFR6S8PU5QLjaY2cNmdomZjQC+B9zonJtKMFJ7+5EO4Jx7BWgHvuGcm+6cezL/qbHAPIIRpK8CmNmVwFlACzAdmGlmC/LPPxv4Z+fcBc65V4FGoNM5Nw14AvhcAb9uEZGCUuERKWPOud3ATODzwFYgDdwGbHTObcg/7Z+ABYc/wlH9m3Ouzzm3Hjg4OnNl/rGaYCRnCkEBAnjVOdfZ7/XdQCb/9tPAhCFkEBEpCd3DI1LmnHO9wCJgkZmtAT59lKf38N5/ZEYc49Bd/d62fr8+6Jz7u/5PzF8W2/O+1x9wzrn8273o+4mIlDGN8IiUMTM728zO6veh6cCbwAQzm5T/2H8HFufffoVgRAjg2n6v2wU0D+CUvwZuNbOm/PlPNbOThhhfRKRsqPCIlLcm4J/MbL2ZPQ+cA3wZuAX4aX7Ep4/gHh2AB4CHzOxJglGXg34BXP2+m5Y/wDn3OPBDYFn+2D9jYEVJRKSs2Xsj0iIiIiLhpBEeERERCT0VHhEREQk9FR4REREJPRUeERERCT0VHhEREQk9FR4REREJPRUeERERCT0VHhEREQk9FR4REREJPRUeERERCT0VHhEREQk9FR4REREJPRUeERERCT0VHhEREQk9FR4REREJPRUeERERCT0VHhEREQk9FR4REREJPRUeERERCT0VHhEREQk9FR4REREJPRUeERERCT0VHhEREQk9FR4REREJPRUeERERCT0VHhEREQk9FR4REREJPRUeERERCT0VHhEREQk9FR4REREJPRUeERERCT0VHhEREQk9FR4REREJPRUeERERCT0VHhEREQk9FR4REREJvf8PqvEF1Lsv4bUAAAAASUVORK5CYII=\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "states_agg_df.total_area.plot.pie(figsize=(10,10))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "Vx3j5SmmqpIf" + }, + "source": [ + "## Create a subset of the dataframe such that it shows mean power generated on each of the days for all the sectors." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Create a new col which is sum of all actual power generated" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
dateregionthermal_generation_actualthermal_generation_estimatednuclear_generation_actualnuclear_generation_estimatedhydro_generation_actualhydro_generation_estimatedall_actual
02017-09-01Northern624.23484.2130.3635.57273.27320.81927.86
12017-09-01Western1106.891024.3325.173.8172.0021.531204.06
22017-09-01Southern576.66578.5562.7349.80111.5764.78750.96
32017-09-01Eastern441.02429.390.000.0085.9469.36526.96
42017-09-01NorthEastern29.1115.910.000.0024.6421.2153.75
\n", + "
" + ], + "text/plain": [ + " date region thermal_generation_actual \\\n", + "0 2017-09-01 Northern 624.23 \n", + "1 2017-09-01 Western 1106.89 \n", + "2 2017-09-01 Southern 576.66 \n", + "3 2017-09-01 Eastern 441.02 \n", + "4 2017-09-01 NorthEastern 29.11 \n", + "\n", + " thermal_generation_estimated nuclear_generation_actual \\\n", + "0 484.21 30.36 \n", + "1 1024.33 25.17 \n", + "2 578.55 62.73 \n", + "3 429.39 0.00 \n", + "4 15.91 0.00 \n", + "\n", + " nuclear_generation_estimated hydro_generation_actual \\\n", + "0 35.57 273.27 \n", + "1 3.81 72.00 \n", + "2 49.80 111.57 \n", + "3 0.00 85.94 \n", + "4 0.00 24.64 \n", + "\n", + " hydro_generation_estimated all_actual \n", + "0 320.81 927.86 \n", + "1 21.53 1204.06 \n", + "2 64.78 750.96 \n", + "3 69.36 526.96 \n", + "4 21.21 53.75 " + ] + }, + "execution_count": 41, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "power_df['all_actual'] = power_df.loc[:,['thermal_generation_actual','nuclear_generation_actual','hydro_generation_actual']].sum(axis=1)\n", + "power_df.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "get the average daily production across all regions " + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "oedxXWs1qtmi" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
thermal_generation_actualthermal_generation_estimatednuclear_generation_actualnuclear_generation_estimatedhydro_generation_actualhydro_generation_estimatedall_actual
date
2017-09-01555.582506.47823.65217.836113.48499.538692.718
2017-09-02555.582512.67423.65218.456113.48499.128692.718
2017-09-03555.554506.64623.65218.514113.48494.610692.690
2017-09-04555.554542.85623.65218.524113.484100.072692.690
2017-09-05558.170555.93023.65218.542113.48494.032695.306
\n", + "
" + ], + "text/plain": [ + " thermal_generation_actual thermal_generation_estimated \\\n", + "date \n", + "2017-09-01 555.582 506.478 \n", + "2017-09-02 555.582 512.674 \n", + "2017-09-03 555.554 506.646 \n", + "2017-09-04 555.554 542.856 \n", + "2017-09-05 558.170 555.930 \n", + "\n", + " nuclear_generation_actual nuclear_generation_estimated \\\n", + "date \n", + "2017-09-01 23.652 17.836 \n", + "2017-09-02 23.652 18.456 \n", + "2017-09-03 23.652 18.514 \n", + "2017-09-04 23.652 18.524 \n", + "2017-09-05 23.652 18.542 \n", + "\n", + " hydro_generation_actual hydro_generation_estimated all_actual \n", + "date \n", + "2017-09-01 113.484 99.538 692.718 \n", + "2017-09-02 113.484 99.128 692.718 \n", + "2017-09-03 113.484 94.610 692.690 \n", + "2017-09-04 113.484 100.072 692.690 \n", + "2017-09-05 113.484 94.032 695.306 " + ] + }, + "execution_count": 42, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "mean_df = power_df.groupby('date').mean()\n", + "mean_df.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "wu-Nh9-IqzDV" + }, + "source": [ + "## Plotting a graph of mean of all the types of power gernerations in all of India, with total power generation" + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([ True, False, True, False, True, False, True])" + ] + }, + "execution_count": 43, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "actual_col_list = mean_df.columns.str.endswith('actual')\n", + "actual_col_list" + ] + }, + { + "cell_type": "code", + "execution_count": 44, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
thermal_generation_actualnuclear_generation_actualhydro_generation_actualall_actual
date
2017-09-01555.58223.652113.484692.718
2017-09-02555.58223.652113.484692.718
2017-09-03555.55423.652113.484692.690
2017-09-04555.55423.652113.484692.690
2017-09-05558.17023.652113.484695.306
...............
2020-07-28592.32626.216105.154723.696
2020-07-29592.32626.216105.154723.696
2020-07-30594.87226.216105.154726.242
2020-07-31596.80026.216105.154728.170
2020-08-01559.48226.216122.794708.492
\n", + "

989 rows × 4 columns

\n", + "
" + ], + "text/plain": [ + " thermal_generation_actual nuclear_generation_actual \\\n", + "date \n", + "2017-09-01 555.582 23.652 \n", + "2017-09-02 555.582 23.652 \n", + "2017-09-03 555.554 23.652 \n", + "2017-09-04 555.554 23.652 \n", + "2017-09-05 558.170 23.652 \n", + "... ... ... \n", + "2020-07-28 592.326 26.216 \n", + "2020-07-29 592.326 26.216 \n", + "2020-07-30 594.872 26.216 \n", + "2020-07-31 596.800 26.216 \n", + "2020-08-01 559.482 26.216 \n", + "\n", + " hydro_generation_actual all_actual \n", + "date \n", + "2017-09-01 113.484 692.718 \n", + "2017-09-02 113.484 692.718 \n", + "2017-09-03 113.484 692.690 \n", + "2017-09-04 113.484 692.690 \n", + "2017-09-05 113.484 695.306 \n", + "... ... ... \n", + "2020-07-28 105.154 723.696 \n", + "2020-07-29 105.154 723.696 \n", + "2020-07-30 105.154 726.242 \n", + "2020-07-31 105.154 728.170 \n", + "2020-08-01 122.794 708.492 \n", + "\n", + "[989 rows x 4 columns]" + ] + }, + "execution_count": 44, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "mean_df.loc[:, actual_col_list]" + ] + }, + { + "cell_type": "code", + "execution_count": 45, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 45, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAlYAAAI6CAYAAADlkwY+AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjMsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+AADFEAAAgAElEQVR4nOzdd3hUxRrA4d9sS9n0CoHQi4CgF1ARQewdLIDl2jsKCgooigU72K+iAnbABnawoYKIAioqICAl1PTey9a5f5yzyyakAZtsEuZ9njzsnjpns2S//eY7M0JKiaIoiqIoinL4DIFugKIoiqIoSluhAitFURRFURQ/UYGVoiiKoiiKn6jASlEURVEUxU9UYKUoiqIoiuInKrBSFEVRFEXxExVYKYqi1EEI0UUIIYUQpkC3paURQpQJIboFuh2K0tKowEpRDoIQ4ichRKEQIijQbVEaJoSYIYRYGOh2tEVSyjAp5a5At0NRWhoVWClKIwkhugDDAQmMCmhjmklLztS05LYdjEBcR1t57RSlJVKBlaI03jXAWuAd4FrPQiHEECFElhDC6LPsYiHERv1xiBDiXT3T9a8Q4h4hRFpdJ9G7nu4UQuwSQuQJIZ4RQhj0dQYhxANCiL1CiBwhxHwhRKS+7l0hxGT9cQf9OLfrz3sIIQqEEEJ/foEQYr0QokgIsVoIMcDn/HuEEPfq7S+v7UNYCHGWEGKbEKJYCPGqEGKlEOImn/U36NdaKIT4TgjRucb1jRNC7NDXv+JpVyP3HS+E2AHs0Jf9TwiRKoQoEUL8KYQYri8/B7gfuEzvttqgL48UQrwphMgUQqQLIR73/O6EEEYhxLP6674LOL+u35O+/TQhxE4hRKkQYosQ4uIa62/Wr8WzfmBdr7EQoo+eES0SQmwWQozyOc55+v6lepun6MvjhBBL9X0KhBCrPO+VWtpa22t3lBDie33fbUKIS322jxVCLNFf1z/01+mXGsfr4fOazhdC5OrvzQd83rPXCSF+0V/XQiHEbiHEuT7HuU5o7/VSfd2V9b3mitLiSSnVj/pRP434AVKA24FBgANI9Fm3EzjT5/liYJr+eCawEogGOgIbgbR6ziOBFUAM0AnYDtykr7tBb0c3IAz4FFjgs26J/vi/eps+8ln3hf54IJADnAAY0YLEPUCQvn4PsB5IBkJqaV8cUAJcApiAifrr4WnjRXob++jrHwBW17i+pUCUfn25wDkHse/3+msToi+7CojVt58MZAHB+roZwMIa7f8cmAtYgQTgd+BWfd04YKt+7TH670ECpjp+V2OBJLQvqZcB5UB7n3XpwHGAAHoAnWt7jQGzft33AxbgNKAU6K1vnwkM1x9HAwP1x08Bc/T9zWgZVVHP+8r72unXnwpcr792A4E8oJ++/Yf6TyjQV9/2lxrH66E/ng98AYQDXdDeszfq667T3x83o73fbgMy9NfEivZe8lxne8/51Y/6aa0/AW+A+lE/reEHGKZ/OMTpz7cCd/msfxx4S38crn/Adtaf7wLO9tn2JhoOrM7xeX478KP++Efgdp91vfV2mYDuQJH+IT8HuNVzHuBd4G798WvAYzXOuQ0YoT/eA9xQT/uuAdb4PBf6h64nsPrG86GqPzcAFT6vhwSG+axfxP4gtDH7ntbA76oQOEZ/PAOfwApIBGz4BIzAFcAK/fFyYJzPurOoJ7Cq5dzrgQv1x98BE+vYrtprjBYQZQEGn2UfADP0x/v032dEjeM8ihbQ9GhE26q9dmiB4Koa28wFHkYLgBzoAY/Pe/yAwErf1gb09Vl3K/CT/vg6IMVnXai+bzu0wKoIGE0tQbz6UT+t8Ud1BSpK41wLLJNS5unP38enO1B/fonQitovAf6SUu7V1yWhBR4evo/r4rvNXv0YnmPtrbHOhJY92wmUAceifVAvBTKEEL2BEWhZM4DOwGS9+6hICFGEljlJ8jlufW2sdj1SSgn4dm12Bv7nc+wCtOCrg882WT6PK9Cyb43dt1rbhBCT9e62Yn2fSLSsWm06o2V2Mn3OMRctc3XAtVH9tT6AEOIasb9LtQg42ufcyWhZw7r4nicJSJVSumuc23Pdo4HzgL16t+uJ+vJn0DJdy/TutGn1tbfGOTsDJ9R4H1yJFvDEo72vGvO+jUPLstV8X9b6+5ZSVugPw6SU5WgB3ji038lXQoijGrgGRWnRVAGjojRACBECXAoYhRCeD4ggIEoIcYyUcoOUcosQYi9wLlo33Ps+h8hE6wLcoj9PbsRpk4HN+uNOaF0n6P929tmuE+AEsvXnK4ExgEVKmS6EWImWYYpGy6aA9gH5hJTyiXrOL+tZ57keAPT6qI4+6z3Hf6+eY9SlMft626bXU90LnA5sllK6hRCFaMFYtW19jm9Dyzw6azl2JtV/P53qaoRe+/W6fu41UkqXEGK9z7lT0bKIDV4H2u81WQhh8AmuPN3ASCn/AC4UQpiBCWhZvmQpZSla9+dkIUQ/YIUQ4g8p5Y+NOGcqsFJKeWYt12ZEe1919LSBut+3eWjZrc7sf493QusGbZCU8jvgO/3/2eNor+nwxuyrKC2RylgpSsMuAlxodSbH6j99gFVoQYvH+8CdwMloNVYei4D7hBDRQogOaB+MDZmqb5+MVsP0kb78A+AuIURXIUQY8CRaHZUnSFipH/9n/flPwB1oXTgufdnrwDghxAlCYxVCnC+ECG/MiwF8BfQXQlwktML28WhZDo85+vX2A29h89hGHvtg9w1HCwByAZMQ4iEgwmd9NtDFU0gtpcwElgHPCSEihHYzQHchxAh9+0XAnUKIjkKIaKC+DJAVLVDJ1dt6PVrGyuMNYIoQYpD+OvcQPoX4NfyG1n18jxDCLIQ4BRgJfCiEsAghrhRCREopHWg1SS79nBfoxxU+y121n+IAS4FeQoir9XOahRDHCSH66O+VT4EZQohQPYt0TW0H0bddBDwhhAjXr/FuoMFhLoQQiUKIUUIIK1rAW3YQ7VeUFkkFVorSsGuBt6WU+6SUWZ4fYDZwpdh/19wHwCnAcp8uQ9DqYNKA3cAPwMdoHyL1+QL4Ey3L9BXwpr78LWABWuC0G6hCC5w8VqIFG57A6he0mhbPc6SU69AKiWej1SOloNXBNIp+bWOBp4F8tIBzneeapJSfAbPQgoISYBNaJq8xxz7Yfb9Dq8vajtb9VEX1LitPgJsvhPhLf3wNWtfVFrTr/xitaBq0oPM7YAPwF1pwUVdbtwDPAWvQArj+wK8+6xcDT6AF3KVoRfMxdRzLjjaEx7loGaBXgWuklFv1Ta4G9uivyTi0gn2AnmjvqTK9Ha9KKX+qq801zlmKVkN2OVrGLAvttfeM0TYBrVs1C+099wF1v2/vQAsMd6G9595He682xICWcctA6/YdgVZTqCitltDKIxRFaS5CiNuAy6WUI+pYL4GeUsqU5m3ZodGzQWnAlVLKFYFuj9I0hBCzgHZSymsb3FhRjmAqY6UoTUwI0V4IcZLe7dQb7Rv6Z4Fu1+EQQpwthIjSi/XvR6srWhvgZil+JLQxrgbo3ZjHAzfSyt+3itIcVPG6ojQ9C9qdZ13Rbi3/EK2rpzU7Ea27x9OldpGUsjKwTVL8LByt+y8Jbdyz59C6qBVFqYfqClQURVEURfET1RWoKIqiKIriJyqwUhRFURRF8ZMWUWMVFxcnu3TpEuhmKIqiKIqiNOjPP//Mk1LG17auRQRWXbp0Yd26dYFuhqIoiqIoSoP0mTZqpboCFUVRFEVR/EQFVoqiKIqiKH6iAitFURRFURQ/UYGVoiiKoiiKn6jASlEURVEUxU9UYKUoiqIoiuInKrBSFEVRFEXxExVYKYqiKIqi+IkKrBRFURRFUfxEBVaKoiiKoih+ogIrRVEURVEUP1GBlaIoiqIoip+owEpRFEVRFMVPVGClKIqiKIriJyqwUhRFURRF8ZNGBVZCiLuEEJuFEJuEEB8IIYKFEF2FEL8JIXYIIT4SQlj0bYP05yn6+i5NeQGKoiiKoigtRYOBlRCiA3AnMFhKeTRgBC4HZgEvSCl7AoXAjfouNwKFUsoewAv6doqiKIqiKG1eY7sCTUCIEMIEhAKZwGnAx/r6d4GL9McX6s/R158uhBD+aa6iKIqiKErL1WBgJaVMB54F9qEFVMXAn0CRlNKpb5YGdNAfdwBS9X2d+vaxNY8rhLhFCLFOCLEuNzf3cK9DURRFURQl4BrTFRiNloXqCiQBVuDcWjaVnl3qWbd/gZTzpJSDpZSD4+PjG99iRVEURVFaPHd5OY7MTCo3biRzxgzy33o70E1qFqZGbHMGsFtKmQsghPgUGApECSFMelaqI5Chb58GJANpetdhJFDg95YriqIoitIiVfz1F/tuuhlZUVFtedippxDUtWuAWtU8GlNjtQ8YIoQI1WulTge2ACuAMfo21wJf6I+/1J+jr18upTwgY6UoiqIoSttj37ePtPETMMfHEzdhAubOnYifNElbt3tPYBvXDBpTY/UbWhH6X8A/+j7zgHuBu4UQKWg1VG/qu7wJxOrL7wamNUG7FUVRFEVpYVzFxaTeOg7cbpLnziF+wnh6fPcdkReOAsDZQE21s7CQjGn34S4vb47mNonGdAUipXwYeLjG4l3A8bVsWwWMPfymKYqiKIrSWki7nbQ7J2JPS6PzW29i6dLFu84Uq93D5gmspN1O1ZYtmOLjMXfo4N0u55lnKf78c0KPP56oSy5u1vb7S6MCK0VRFEVRlLpIKcmc8QgVv/1G0tOzCD3uuGrrhcWCMToaZ04OALkvzyb/9dcxhIfT67e1CIPWgebMzgbAGBPdvBfgRyqwUhRFURTlsOTPe53iTz8l7vbbiRw1qtZtzB07UvTJJ4QceyxFixYB4C4txVVUhCkmBtif0TKEhDZPw5uACqwURVEURTlkJd98Q+4LLxBx/vnE3TGhzu3azXiYPWPGknn//QBYTzqJ8l9/Je+VVzElJuLMzcW2fbu2sdvVHE1vEiqwUhRFURTlkFSuX0/GvdMIGTiQ9k8+QX0TrYT060eH554l/e7JAFiHD6P8118pfO+9A7aVThVYKYqiKI0knU4qfv8dY0wMlk6dMIS23m4P5chlT0sndfwETImJdJz9MoagoAb3iTjvPJwFhRR99CFhw4aRo08n3OPnleB248zOZs9ll6uMlaIoitI4FevWkXHvNBzp6QAYIiLosew7jFFRAW6ZojSeq6SE1HG3Ih0OkufO8dZINUbMVVcSc9WVSLsdg9VK/MSJmBMSAHDm5QMgXe4maXdzUIGVoihKM8q4fzqO9HRirrsOY2wMuc89z47hJ9Plk48J7tUr0M1TlAZJh4P0SZOw79lLpzfeIKhbt0M6jrBY6P3nuurLjNrdgdLlrG2XVqExI68riqIofuDMz8exbx9xd95B4rR7ibv5ZtrPfAqEoOCddwPdPEVpkJSSrEcfo3z1Gto/8gjWISf49wQGo/avylgpiqIoDdl3/Q0AhA4c5F0WddFFVKxZQ/Fnn+HMySH0hOMxJyQQfs45GIKCsO3ciSk2VnUVKi1CwVtvUbR4MbG33ELU6Ev8fnxPxkrVWCmKoih1cpWWsu/Gm7Bt307EeecSenz1wRPj756Mu6KSir/+ovyXX7SF904DsxkcDiJHX0LSE08EoOWKsl/JsmXkPPsc4eecQ/ykiU1zEqOWsVI1VoqiKEqd7Lt3U7VxI0E9e9J+5kzvKNMe5sQEOr78EtLhwJmTQ/FXX5P7/PME9+lD1caNOHPqn19NUZpa5T//kHHPvYQMGEDSzKcOeA/7i/AGVq23xkoFVoqiKE3MXVUFQOL06Rgsljq3E2Yz5g4diL35JsJGjCCoZw/2jBnbZB9iitIYjowMUm+/HVNsLB1ffQVDcHDTnawN1Fip/62KoihNTNpsAIiguoMqX0IIgnv3QhgMGKxW3BUVTdm8FqFy02ZsO3cGuhlKDa6yMlJvHYesrNKGVdAnU24qwqRnrFSNlaIoilIXT2B1KN/0DaGh3vnT2ip7aip7xozBGBVFr7VrAt0cRSedTtLvuhvbrl0kz5tLUI8eTX9ST3bW1XoDK5WxUhRFaWLuKk/G6hACK6sVd3m5v5vUYkgpSbuziQqhlUMmpSTriScoX7WKdg8/RNhJJzXLeffXWKnASlEURamDtGk1Vobghqf8qMlgteKqaLuBVel3y7D9+y8Ali5dAtsYxatw/nyKPviQmBtvIPrSS5vvxN6MVeutsVJdgYqiKE3MU7wuGjGXWk0GqxVZ3vZqrNw2G/lvvkneSy8T1LcPpqgoXEXFgW6WApQuX072zFmEn3kmCZMnN+u5hUkLS1TGSlEURamTtNmBQ+wKDA3FXVFByXfLcBYW+rtpAZMzaxZ5L70MQMKkSRisYUiHPcCtUio3byZ98hSCjz6apKdnNfsdqd7zteLidRVYKYqiNLHD6Qr0SJ84kT2jxyCdrXd8H1/lq9dgTk6m98YNhJ18MsJiwW1XgVUgObKySLvtdozRUSS/+gqGkJDmb4QaIFRRFEVpiLvKBiaTt5vjYISffRb2tFTM7dqTP28eea/NwdKlC5EjL2iCljYPZ2Eh9j17SJgy2Tuul7BYvJk9pfm5yspJHXcb7vJyOr//Pqb4+IC0w1O83pozViqwUhRFaSR3VRWO1FSCevY8qP1kVVW9A4PWJ7h3bzo8/TTOggLy336bvFdeAcA69MQmH1Ooqdj37AHA4nP7vgiyIFXGKiCky0XG5MnYduwgec5rBPfuFbjGeDJWztYbWKmuQEVRlEYq/OBDdo0cRfo99+DIzmlwe08BrttuQxzmaNWmmBi6ffoJ7WY8DMCuiy5i73XX48hpuB0tjX3vXgAsnTp7lwmLCqwCJXvmLMpWrqTdA9MJGz48oG1pCzVWKmOlKIrSSK6CAgBKvlyCq7CITq/Po3jpVxR/+gnuyioSH5hOSL9+FCx8D0dqKkWLF4MQuMvLMcbEHPb5g3r2JKhnTwzWMIoWLaJi7VqqNm3CfNpph33s5mTfswcMBiwdO3iXGVRgFRAFC9+jcMECYq69lugrrgh0czQmk6qxUhRFOSIIASYT8XfcQe4LL5D78mxv15wxKoo9o8dg7tABR3r6Abt6gjJ/iBx5ASHHHsPOM89qdUMUuMrKKVvxE8F9+iB8ukc9GSspJUKIALbwyFH6009kP/kkYaedRsI9UwPdHC9hMEArnoRZdQUqiqI0knQ6ESYTUaMvwRAR4Q2q2s2YQdcvPif66qu9QVX3Zd9x1JbN9Fr3B0mzZpI0a6Zf22KMjATAVdJyAivpcJA2cRKVmzfXur7o88/ZPngwtm3biDj//GrrvEGWw9HUzVSAqq1bybh7MkFH9abDM0/vLxpvCYxGlbFSFEU5EkinA2EyYYqLo9fqX3GXl1O26hcizjoTYbHQbvr9JEyZjCM9HUunTgAYw8KIvPBCv7fFEBYGQuAqbjmBlS0lhdLvvsO+ayfdliypts6+bx+Z0+4DIOm5Z4k455xq64VFG4rCbbdjrKPQ35mbiyMjg6otWwg77XRMCfEqu3UIHNk5pI67DUN4OMmvvYbBag10k6oRBoOqsVIURTki6Bkr0EaINkZGEnlB9cyLISiIoG7dmrwpwmDAGBGBu7gEZ0EB0umkbOVKwk4+GXNiYpOfvyYppbcoHbP5gPXZT2kZO0uXLkTWyFbB/oyVb52VMz+fshUrKP78CyrWrau+wyOPEty/P6HHHYfBGkrUmDEBue7Wxl1RQdptt+EqKaHLewtb5mumaqwURVGODNLhBHPL+bNpiIrEmZvDjmHDwa19EFm6dqXb0iXN2rXjKisn7Y4JVKxZC4DwCawq/9lE/htvULZiBQDtn3qy1mMIi7aPb2CV++L/tBsAAAwGYm+9BVNUFIbwCEq+/pryX36hatMmkBJhthB3y81NcXlthnS5SJ96D1Vbt9Lx1VcI7tMn0E2qlTAYkK24xqrl/IVQFEVp4bQaqwOzMYFijIik9PsfvM/NycnYd+8m+6mZSFsVwf2OJvryy5q8HYULF3iDKgBhMGJPSyP11nHYd+4EIHTIEDrOno0xrPZuJ0/GypGeTu7s2TjS0qlYuxbr8OG0f2QG0u3G0rGjd/uwU0ZQ8tXXRF58EdsHH4e02ZrwCtuGnGeepezHH0mcPp3wU04JdHPqZjSqSZgVRVGOBNKnK7AliL3xRir++hPriScSfNRROAsK2DN6DIULFwJgXPFTswRWlZs2YQgLo93DD5MxdSqVf/9N/rzXse/ciSEigs7vvkNQr171ZtE8A6juvfIqQAsSMRiIHHkB5qSkA7Y3xcQQc7W2LUZjm5nqp6kUfvghBe+8Q/RVV+1/3VooYTQiVY2VoihK2+cpXm8pIs45m4hzzvY+94yVFTJ4EEE9e1L67XdN3ga3zUbVxn8IO3k4kSMvwFVSTPZjj1O0aBGhxx1H5wXzG3Ucc3IymM1YkpOxDhlC4n3TwGhs1CTAwmhs1bfnN7WyVb+Q9djjWEecTOK0ewPdnIYZDdCKR15vOX8hFEVRWroWlrGqyRAURPcfvseckED2M882SxYn+/HHcebkEDFqFAAxV15JyTffULnuT8LPPaeBvfcL6d+fozZuOKS7/ITJpNW/KQeo2r6d9EmTCOrZkw7PPd+i378ewqAyVoqiKEeElla8XhtPHZIwmZoksCqYPx/pdBF7w/U40tMp+vgTYq69tlrNTtJTT1Hxxzoizj/voI59yEMnmM2qK7AWztxcUseNwxAaSvKc1+qsb2txjAZVY6UoinIkaGnF6/Xxd2DlyMmh/JdfyX7yKQAiR40k5fQzAIi55upq21qSk7EkJ/vt3A1pqiCyJXPbbFRu2IAlORlz+/YHrq+sJPX28bgKi+i8YAHmdu0C0MpDI4wm7zybB8NZUIBt+w6sQ05oglY1ngqsFEVRGkm6WnZXoC9hNoHD4bcpYnKefoaSpUu9zz3DIIQMHoS5Q4e6dmsWwmhs1bfnHyxHejrpk6dQuX495uRkeny/rNp66XaTce80qjZtouPslwk5ul+AWnpohNEALhduux1hNjf6/bvz7HNwl5Zy1L9bAjpwrJrSRlHaCGd+PqnjJ7Br5EgyH54R6Oa0TY7WE1jhaechfPOvybZ7d7WgCiD3fy9hjIwkec7cwz7+4RImExwBNVbFS5aya+QoUk4/g8oNGzDGxuJITcWZm1ttu9znn6d02TIS7rmH8NNPD1BrD52zsIjS779n24BjyGrk3zJXSQnu0lKAgA+9oQIrRWkj8ufNo+zHH7HtSKH0++8D3Zw2qaUNt1AfT5floXSp1JR+50QAzElJxE2Y4F3e7uGHWkbdjvnI6ArMnjkTR0YGEaNG0mXxYjq+9D8AKjds8G5TuGgR+W+8SdTllxFz3bWBauphibv5JkKHDAGgaNEiHFlZgHZtGfdPB7RBaXOef4HUW8ex97rr2X78/u4/d2Vl8zfaR+v4C6EoSoMqN2wkZNAggrp1pWzlz4FuTpsknU4wtaDJauvhCQClwwlBQYd1LE/QEnvbOKJGj8ZdWooxKpKI8w6uOL2pCFPbL16XUuIqLCT2lptJmDQJ0OZVNERGkjbhDjq98w64XWQ9+hjWYcNo98ADrXYexZhrryXm2mux7dzJrvMvoPT7Hwg/8wyyHnoYgMRp91L6/Q/kz5tXbT9Lj+7YU3biLq+A6OhANB1QGStFaROkw0HV1q0E9+uLMJuRDkegm9QmtbbidQCch/deqNywAfvu3ViHDSNqzBiEwUDifdOIu+02P7TSP46EGit3eQW43RjDI7zLDBYL8ePHg9nMvuuuI23iJIK6dqXDiy+0msxqfSzduhF8zACyn3iClFNO9S4vX72GyvXrEWYz1uHDAYi+6irib78dAFlZEZD2eqjASlFaoKrt28mYdl+jv4XnvfYasqoK6/HHq8CqCbW0AULrI/RhIQ43k5N6m/Zh5S4ra7EZkCOhxspdWgKAISK82vKYa66m3UMPAto8i9qwCmHN3r6mIISg4/PPe5+HjRiBKT6e9EmTKFq8mLAzTvfOSxncty8iJAQIfFegCqwUpQVKnziJ4s8/x757d4PbOrKzyXv1NRAC67BhWmDlM5Gt4ketsHj9cAIrd1UVroICAKKv/K9fmtUkjoAaK1eJVpjtm7HyiDz/fOLuvIPuX38V8Ds0/c33eto/8TidP3gf69ChRFxwAe0fexxLp04ABPXojiFUq/dzV6gaqxZJulyk3nwzGE10en1ewzsoih+5iou1f4uK6tzGbbMhLBZKv/0WgK6ff44hOFgbLNGPt9kr+0mn05sJaum8xeuHEXCUr9UmVk5+/XXChg/zS7uagjCZ/VKk35J5MlbGGhkrAENoqLcbrC3q/N5CSn9cjikuDoBOb73pXRd/1ySsJ51EyIABVP7zDwDuAHcFto6/EE3IkZnJnksvo/3Mp7AOHYoQArfdTsG771K+eg2gBVn1TR6qKP7mLi8HtEEZa1P266+k3nQzSAmAuWNHgnv3AvTJbKXUbrNvLdmVVkIrXm8dr+n+4vVD6xZ2ZOeQce80RGgooScc78+m+Z0wGpGHWUvW0nkyVoZaMlZtXeigQYQOGlTrOkNQkDfoN+hdgVLdFRhYJV99pQ37f+NNALR79BEKFyzEtmOHdxvpdKrASmky7ooKpN2OMSoKgKJPPvGOw1JzfBrQBv/Le+llb1AFEDJggPexp+ZAOlpPPVBLYNuxA0NEBObEROxpabjLywnu3bvaNq2qeN2TWTuEjFX56tXsu+FGAKxDT9SC9RZMmEzI8sCOXdTU6stYKRqDqrFqGSrW/QlAyLHHApD10MPY09NJeuZp4ifeCaAm91Sa1L7rb2D7kBORUuKuqCDr0ce865w5BwZWFb//TuWGDbR//DF6/PgDMdddR+y4W73rfQMrpXGc+fnsGnUhKaefgausjJ1nnMnuCy86YLvWNY7VoddYlf/+OwCx426l/WOPNbB1C9CKaqxcRUWk3TkRR3bt2ei6eLLXxti4pmhWmyBCQ4HA11gd8YGVbfcuws85h7jx+/unw4YPJ3LkSAxWfeC7Np5iVgJHOp3ewf0yJk+hYOF7SJuNTvPfxdyhQ9lf124AACAASURBVK0ZK/vefQBYTzoJc4cOJE67l+BevfZvcIiBlXS52D5sOHlzj5yaQnd5OdJup3jJEi0D6HTWOtCgbdcu0iZO0u6MayWB1eEUr9u278DSvTsJkya1imLoQNRY5b76arWBORsr/803KV22jOJPPzmo/Rz7UjHGxLSMAVlbqJaSsWolfyGahttmw5GaRuT5FxA2fDimpPY4MzLBU/Crd/+19aJIpWlV/fsveXPnYd+9m6jRoxFmE9YTT6T4yyWU//6bd7uSr7+Gr7/GlNSe0MGDMSUk4MzNRUqJMyMDR04Oof/5D46MDDCZMCUk1Ho+b8aqkXcGum02Ct55l8KPPsSVl0feK68Qd+sth3/hLZQjO5u028fjzM/HmZenLXQ6CRk8iKCePSlf9Yu2XVoapT/8SHC/vuwePQakJKhXL6xDTwxg6xtPGH0GCD1Ith07CO7XeuaXa+4aK1dZOXkvvUz+vNc5av3fjd7PkZVF/utvAHiHBmgM6XJR9usvzTqxdWskgoLouWY1Rmtgg88GAyshRG/gI59F3YCHgPn68i7AHuBSKWWh0G5D+h9wHlABXCel/Mu/zT48hYsXU/D2O5iTO4LbTcjAgQCY4uO1wErnj7tqlCOblJL0uyd7h03IfvLJWrfr+uknlC5fQd7s2cSPH48wGDDFx2NLSSF/7jxyX3wRgN5/rsORkYE5MbHOur+D7QosW7mS3Bde8D73DLjXVpWvWkXV5s1EnH8+BqsVYTJiiIwkcuQogrp1BbSbWlLOPIuMqVMxxschKytJeuYZIkdeEODWN97+cawOLuBwV1TgSEsj8qILm6JZTaK5x7Gy792jn7jhu25LV6wgf85cDNZQqrZt9y535RfUu58jO5vUW24FtwtXWTnOzEysQ1pHUB8oQghMARxx3aPBwEpKuQ04FkAIYQTSgc+AacCPUsqZQohp+vN7gXOBnvrPCcBr+r8tgjM/n6wHHwLAvmsXMTfcQNiwk6pvpP9fqTYlhKIcAtv27dh37yZhymQq/vqbsuXLvessXbti372buAkTCO7bl+C+fYm5+iqMkZEAmBISKF22jNzZs737lC5fgW3bNiydO9V5Tk+hsW9gZduxg5JvvsUYG0P4GWdgTkzcv27bdhCC3n/9yd5rrwv4BKZNrfKfTRgiIkh65mmEofZqCHP79nR5/z1Sb7sdS+fORE26q1UFVeA78vrB/f2y7dylZed69myCVjUN0Yw1VpX/bCLn6acBMEZHIV0uZFWV1pOcmYEzN5fiJUtJnH4/ZcuXk3HPvd59g3r1In7CePLmzCX/jTewdOlC1OhLqh3fVVZGxdq1FCx8D9u2bYSdcToWsxnrbeOIOPvsZrlG5fAcbFfg6cBOKeVeIcSFwCn68neBn9ACqwuB+VJKCawVQkQJIdpLKTNrO2BzK1q0CID4iXcSNmIEwX37etcJqn/72H9XjaqxaquKv/iC4KOPJqh79yY5vjNX62oKGTiQ2Jtuwl1RwbaBgwg75RQ6vPA8ruJizO3aebf3BFWgTXirHcRJl8WLSLvjTjKmTgUg8pL9f6wPUCNjlT1zFgXvvONdXfD2O7R/9BHK//gDU3Q0JV9/jaVTJwwhIRiCgpBVVf649Bap6LPPKfroI8LPPLPOoMojZMAAev36SzO1zP8aW7zuLCxk13nnA9p4QZ47ooN6tJ7ACqOpWUo2pMPBvptuwq2PM2eMjCL3xRfJf/0NjLGxuPLzvds6sjKpWKONAxY1dixhI04m9PjjMUZEULBgIUhJ5vTpuEpLqNqyBZxOpJRU/r0epz7pcMjgQST7fLFSWoeDDawuBz7QHyd6giUpZaYQwlPw0QFI9dknTV9WLbASQtwC3ALQqVPd3779SUpJ4UeLsA4dWu88V55BFQ/nrhql5StfvZqMe6cB0GvdH00yDYRnPCrPjRCG0FB6rlmNwWrFYLF4iy1rE33ZpZiTkrB0Sia4b1+i//tfcp9/nqCePYi8sO5uGt+uQHtaGgXvvEPoCSeQMHUqtpQdZE67z3srPQAmE+2m36/tGxxc76CkrZV0u6lcv57M++7DYLWSMHVKoJvU9HxKGXKef4GyFSuIn3gn4Wec4d2kbNUqyteuxVVYCEDFunXYd+9BWCxYOrWeeh5hap6MVcVff+MuLibp2Wcp/e47bCkplHy3DABjVBSxN1yPKS6OjHunUbFmLSIkhIRJE4kYNapaF1Xo4MHYd+0CIGfmLIxRURj19ZaOHYkbfzuVf/5FbBuudWzLGh1YCSEswCjgvoY2rWWZPGCBlPOAeQCDBw8+YH1TsO/ZgzMrq+6gqmZ/uQqsWiV3RQV7r7sec7t2WIedRNTYsQeMQF7x55/su3n/H638ea+TcPdd/m9LWRkABuv+oK2xNQAGq5WIc/an/mNvupGQo/sROngwop5xhTyBVcH8BZR+9x0A8ZMmEnJ0P4L79cWZnYMpIYGwEScjHU6MUZEYgoK0cwYH4awK7B01/uauqCDl7LNx5eaBECS//rp3Goy2zJNxd5eWUjB/PrKqirQJdxB7663ET5pI5oMPUvxx9TvT8ue9jiMtjaA+fVrP3Y94aqyapmfBtmsX7pISLD16ULlRuwswbPgwKtb9geuPQoyRkYSffTYd//fi/p1MJoQQhJ1xRq1jgLV7ZAbtHnyA8jVrsKXsJPqyS/ffha6LHju2Sa5HaXoH8z/nXOAvKWW2/jzb08UnhGgPeAblSAN8v+p0BDIOv6mHr+L3PwAIHVz7CK771chYqRqrVqNgwUKKFi3CtmMHVRs3UrpsGUgQFgvO3FxCBw0kdNAgsh59DGEw0Pm9heS+9DL58+YR1KsXkRec79f27M9YhR72sYTBgHXo0Ia30wOrkiVLMMbEkDRrFqH/+Y+2Toh67/gTQcG4q9pWjVX+W2/jys0jZPAgOjz3XLX6srbM8/er9IcfkVVVdHxlNmnjJ5A/dy6yqrJ6UGUygdOJIy0NhCDq4osD1OpDI8wmXMXFZD35JO3uv9+vx/Z0kyIESIkIDsYYGYkxOhpXURGuoiKsJ1Wv0408v/6/I0IIMJsJO/lkwk4+2a/tVQLvYAKrK9jfDQjwJXAtMFP/9wuf5ROEEB+iFa0XB7q+qnLzZgoXLKTy778xd+yIpVu32jeskdXwfmNzqcCqJfIMQ1Cxbh0R554LBgPZs2aB00mHF54n/Oyz2XrMsWQ9/LB3H1NiIvGTJmHbto3EBx4g5Nhjaf/Uk6SMOIWK39Y2QWClZaya8/Zf32/I3b/9BmNE46fAEMGBr7Eq+vhjipcspdM7bx+QabTv3YshIsKb9XMWFmKMikIIgZQSV0EBpthYpJTYd++hcsMG8vQaleQ5c5qku7el8nQzl37/PcaYGMJGjCD5zTdIvfkWCt6dD0DYaadRtnw5htBQ3CXayN69fv8NY3jrGt077NRTKf/9DwrnL8CRlk7owP9gTu5ExNlnHdZxpc/sBp6ZDjyDSUePHYu02yn5cgnWFjyPotL8GhVYCSFCgTOBW30WzwQWCSFuBPYBnrzl12hDLaSgDbdwvd9ae4hcBYVU/P47Ukpib7yx4YlpVY1Vi1aw8D1sKTu0epCUnQBk3DsNS4/u4HSSNGumFmgB1uOOo3z1ajq+Mht3ZRUZU6aQed99CLOZ8DNOB8CcmIh12DAq1x/8YH8NcZWVISyWervu/M5nypWDCaoADEHBuAN4V6AjPZ3MBx4EIOPee0mcOpWc554n8uKLcZeXkXb7eIKOOoouiz5C2mzsOHEo1uHDCT/rTAo/+ADbln9JfuMN7Lt3k/3EE97jJkydekQFVQCm9u1Jeu5ZbFu3Enr88QiTibCTTuKojRvYenR/AIL79dUCK6uV6P9egX3v3lYXVAFYhwyhy8IF7L32OsqWL9fuvjUaidi86bCOW7L0K0AbH6nHTytwFRZ6u/XNSUkkTp1Kon5DiaJ4NCqwklJWALE1luWj3SVYc1sJjPdL6/wkbPgweiz/seENVY1Vi+cqKSH78ccBsHTpQuytt5I/dy6AN8gK8pnfrf1TT+HMzSXk6H7at08BzqxsIi+6EFPs/re0dehQcp5+mh2nnEroccfR/pEZGEJDKfr4YxwZGURfeWW17RvLXV6OoZk/0D0jM0dfc/VB7xuIjJU9NZWsRx/DVViILSXFu7zkyyWUfLkEgOLPP/cut23dyrYBx3gH6ixftYryVasw6HdUpt6kzfsZcswxBPfvjyMzk5jrr2umq2k5hBBal1SNbinf2qnQgQPBaCRx6hQizjuvuZvoVwarla4fL6Zg/gKyn3yy3i8VZat+oWzVz0i7ndjrrsPSpcsB2zgyMrx34Xb/+itM0dEtYowkpeVrPdWJzcmbsfLcXaUCq6Zg27mT7KdmEn3VlYSfckqj9vHM8N7+iSe8479E//cKQPvAKF22rFpgZU5MwJyo3bDq/aCpReSokRS88w7CaKRkyRJsKSkkz53jzZ4UzF9Ar9W/HnTmyV3W/IGVpUsXun7+WbXXobEMwSFIux3pdoMQDWd3/aD0hx8pX7UKY0wMsqoK60knkTBlMllPPIFt+w5vF1Wn+e9ijIqicMECij7/gvLVawDosngxpphojLGx5DzzLIXvvYele3c6vPwS5jpGpz/SCbMZ6XBgTk6mz2FmdVqamGuuxlmQT/7rbyBdrmoD6Uq3m8L33q+WzXSkpdPpjdcPOI49NQ2Ajq/MbhXT+igthwqs6nGoIxcrjVO6bBnlv/yCITys0YGVp2bJN1jxLUaOvuKKQ2qLKS6Onj+vBCB9ylRKli4l5eQR2rr4eJy5ubjKyzEdZGDlKi3BEIC5vYKPOuqQ9hPB2t2Buy++BGdeHsmvzCa4f/86R3n3B9v27Rjj4uj+7bfkvfoqEWedSXCfPnRZuBDQBvWVLpc3SGr/2GPE3norBW+9hXS5Cel/tPdYsbfcgruyksR776k2JphSXdjpp1P67beYYmIC3ZQmYYqPB5cLV2Ehpjht0mLpcFC89CtvUGUdNgx3WRnO7Kxaj+HM0kqDm2qMO6XtUoGVL8+3c0/GyjNXoOoKrJV0u8mZNYvyNWsJ6tGDhKlTMLVrh23bNsxJSfWm4p25uZT/9rt2nIOYidxdqmWsjOFNlwVKeuZpSpYu9T6PG387WTMewV1eAQfZFeDYs/eQMkeB4i7T7mK0bdsGwJ7LryDxwQeIufLKJjtn1ZYtBPfqiTHMSuI9B9ar1NYFa+nYkXYPPXTAcnNiAklPPnHAcqW6pJlP4bjzDgyhh3+3aktkio8HIPd//8NVVoY9ZSfu8nJtnk0g4rxziZ84kbx587xzQ/py2+1UbtioHctnAF9FaQwVWNXnEKeEOFI49u3z3l1k276dku+/xxQXhzMzk5CBA+m8cIF3dOvKjRux791H8RdfYN+9W/sDp99l49JHMW4Mlx5YNWX3mhCCbku+ZNfIUVg6d8YYFQXsHzqhPo6sLKo2bSJk0CCQEntqKuHnndtkbfW3sBEnU7Z8OR1nv0z+m29RtGgR5at+aZLAylVcTOkPP2Lbto3IafWMJK/4nSE4mKC67o5uA0KOORZzp04ULf54/0IhSHzoQcJPPRVz+/YAGCMiceldzaDdBeguryDjnnu8d0sagoObu/ltUrnNiRAQamn7YUfbv8KDUaOcZP8kzE0/VUJr5MjRhi7r9PZbmOLiKPzgQ5x5eTgTE6n86y8KP/wQc1IS5b+upnDBAkArMA077TTCzzoLQ2goJd9+e1CBlSejYghr2juXgnr2pMePPyBCQ6napNWguCsaDqxSTjlVe6CPeQMQ1KNHk7XT30IHDqTbki8BaP/oI0i3i9Lvf0BK6dd6K+l2s+/6G6jasgVTu3atbtwkpWUzJybQ/aullK/WZjkwxcVhCA8/IPtpjIxAVlXhttvJeuhhylau9I5CDxDcr19zN73NqLS7+OD3feSV2Vi1I48dOaV0jQvji/EnYTHVP51Ua6cCq9p4egTN9d8V6K6sxLZrF8aICCzJrWf6B39x5uQC2mTBQd270+4hrdBb2u3sPO98sh99zLtt1NgxRF16GZbkjt4MEIAzP4/SZd83+pzusqbvCvTwFKx6ukvcFRX1bu/My9v/RA+qjJGRhJ9+wM2zrUZwr94Uf/wJrrw8b/dKbQ428Kr4/XeqtmwhbsIEoq/8r6qHUvxOmM2EjRhR7zYGvVzBmZGh3Xnqc8dk1NixxI2/vUnb2Fb9ubeAKYs3sjuv+pfRfzNLWJ9axPFd22Ztn4cKrHwcMAmzqfbidelwUPXvv+y57HLvB2ivdeu8t7kfKZx6xspU484rYbHQ9bNPqdq8BUNwEKakpDrvzjJGROIqKCDz4RlYkjsSNWZMtcCrJlfpgcXrTc0bWNXTFSilJOWs6jPPx950IzHXXtuquxKCemiFu5UbNlSbY66mzGn3UbZyJb3WrmnUcUt/XI6wWIi94fo2W+ejtHzGCC2gL/t5FaAPIhsZQdmKFcTdcUez3BXbllQ5XDz//XZeX7WLpMgQ3r/pBI7tFMX7v+3jmOQoxs5ZQ3ZJ253k3UMFVrXw/mfy3Anlk7Gq2rqVPWMvRdaYl8qZmYGxZyuaDd4PbCkpiJCQWoMcY3g41iEnNHgMz/AFRR99BEDOs88ROngwkaNHE3XxRd7tpMtF1ZYtVPzxByI0FNGMwYpnDq/6MlYlX3+NrLE+7vbbW33QENSnDyI0lOynnyF08GByXniRoN69iL78cm/9nLuqiuIvvmjgSPu57XZKvvqKsBEnt/rXR2ndPF/isp98EkNYGKH/ORaD1UpI//4Bblnr8/e+QqYs3sDO3HL+e0In7j+vD2FBWohx0/BuFJbbAVRgdcSpOaWNufo4VlJK8t94E+l0EnH++YQefzwVf/xBydKlOLKyCDoCAqvytb+R/dRTuEpKcGZmEnX5ZYf1rS768sswt29H5EUXUbhwIaXf/0DFunVUrFtH+Gmnkv/OO1SsWYszPx9HaiqgTSjcnN8ka+sKdGRnY0pIQAiBq6yczOkPANDtm6/Zde55YDK1iaDBFB1Nu+nTyZw+ne1DTvQudxUWEj9+PPnvvEPOzFmNPp4zL499t9yCq6CA6P/+tymarCiNFjpoIJZu3bDv2kXM9dcdMBGy0jCb08WLP+xg7sqdtIsIZv4Nx3NyrwPLBqJCzViMBnJL29ZcpLVRgVWtap/SpuDtdyhZupSQY4+lw3PPAtqo7iVLl+LI1MY8cWRng9vtveukLcmbM5fcF1/E3LEjIQMGYBxxMonTph3WMU1xcUSNHg1AzLXXEnXppaTfPZmyFSvYed75uPLzCR4wAGGxEHPttYSffTYh/znWH5fTaJ4AybF3LwDFS5aSMXUqHee8Rvgpp5D10IPaJLezXyaoa1c6vfsuwb17NWsbm1LkqJHYU/eRP0cb4T64Xz/KV68h9sYbyZ/3OsH9+2PfuRN3RUWDtVa5L8/Gtm07idOnYz3xxDq3a8uklCz8bR8L1uwhxmrhqHYR/JqSx11n9uK8/m3v70ZLZggJocuiRVRt3EDokCGBbk6rszGtiCmLN7A9u4zLBicz/YI+RASba91WCEF8eBB/pxZRaXcRYql7bLy9+eV0jm29Qa4KrHzVMQlzydKlFH/5pXfOLU9QBfp4KQYD9p07ybh3GsVffIEpPp7Iiy/GEBZG3C03N+slNBX73r3kvvgiAEmzZhI6aFCTnMcQEkLya69SuHgxea+9RtTYsbR79JGA1jqIkBBEaCgF787H0rUbWTNmAFDwxpvatCtff0PEyJHeGiTrCccHrK1NQZjNJEyaROhxx4FbUvLNN5SvWkXea3NwFRTQ8X8vUvH3enKffx5ptyOCgmo9jpSSsp9/Jvz004m5+qpmvoqW4+b5f/LDv9mYDIKyKidrdxUAcPt7f/HPjLMIr+ODSWkaxjAr1qFDA92MVsXudPPy8h28+tNO4sIsvH39cZzau+FZDoZ2j2Xxn2nc+8lGxp/agzdW7aJ7QhjjRuwfhHXRH6nc88lGFo87keO6tM4idxVY1cYzQGhQENahJ1K15V9cRUWYEhNp98iMandHCbMZ65Ah3vGcQBv8Mn/ePIA2EViVrfqF9EmTQAg6vvxSkwVVvqLHjiV67NiGN2wGQgg6v/0WaXfc6Q2qACrWrfM+tp7U9v8wh510EgBVmzdRnJtL/ty5RF5yCSGDB1O1ZQuANs9gHYFVxZo1ODMzCZswodna3NI4XW5W7cile7yVbyedjFtKxs5Zw8Y0bciR/jOWcXSHCBLDg7nz9J7klNr4eXsuo45NarUfMkrbsim9mCmLN7A1q5TRAzvy0AV9iQxt3JeBJy/pz96CCpZszOCHf7OpsGtDGV1/UheCTFoG65tNWu+PpyarNVKBla+aGSuDgU5vvYWUkorffifk2GNqvcMr7rZxlK9ejQgJofs3X1P+62oyp09vMzVXOc89pwVVr8wm/LTTAt2cgAg55hjaPfwQaeOrBwVJzz5Lxj33YD2CuhFCBgwAIOqyy2j34AMIIbxZKrfNhifBb9uxg10jRwGQeP99ZD/5FKak9kRcUPt8jUeC2StSsDnd3Hl6T8xGrfj/ywnDcLslX/2TyZpd+WzOKOHPfYVc+Mqv3v32FVTw7g1tKxOqtC4Ol5tXVqQwe3kK0VYLb1wzmDP6Jja8ow+z0cDTowdw4Su/UlzpoEdCGCk5ZeSW2ugYHYrD5WbFNm0Yn8iQ1pu5VYFVbWoGWELUe4db6HHH0fPXXzBGRyMMBqJGX0LpiuU49qU2dUublHS5QAjse/YQffnlR2xQ5WEdPpyIkSMJPuooIs4/D4QBc2ICkUdYoGAdOpTeG9Zj8MlMCYv2WNr3f8vMnf2K93H2k09h7tyJji+/XG2/1sjudB/0AIdSSr7ckMFrP+3EYjIwtHtctfUGg2DkMUmMPCYJgPWpRUxetJ4rju/E41/9S1QjMwKK0hT+zSxhyuINbM4o4aJjk5gxqh9RoQc3b6pHlzgr30wczvKtObSPDObGd9cxbNYKOkaHVMtSuaW/Wt/8VGDlJzVH9BVmc6ueYzD3pZfImzOXsBEjkFVVWLp2DXSTAs5gsdDhmacD3YwWoWZwZNAnb5a2/Xf82Pfs8T4WQUF0nj+/2oTZrZGn/uPBC/py1ZBO3u6LhqxPLWLih+sB+OHuEcSH1x9cHpscxY+TT9HOuS4Vu9N9WO1WlEPhdLmZs3In//txB5EhZuZcNYhzjj78uROTokK4akhnNmfsn3WjU0wo3eLD+CetiMIKB27ZeiMrFVj5qjEJ82EdymQ+YKyr1qT0+x/A7aZsxQoAgvv2DXCLlJbM2xVYVUXFX3+T9fBD2HakkHj/fUSMHIkhKKhNDD+xO18bJPaxpVvYllXC02OOYX1qEd9sysTmcDP+1B7Ehwexdlc+mcWVvP/bPiKCzSREaK/P13cOp0fCwQ1uG2QyqsBKaXbbs0uZsngDG9OKuWBAex698GhirIeWpapLYoRWWhNiNvL+zVo5xbo9BYyZswZXK05ZqcCqiQiTqdVlrLRZ4FPIfOhhbDt2EHfnHUSNGQNQ58jpigI+XYE2OzmzZmHbkYK5QwciLrgAU3R0gFvnX0aD4IIB7fnkr3TO7NuOKYs3UFypfYn6Yn06px2VyCd/pdW6b5/2Bz/HZZDJgE0FVkozcbrcvL5qNy98v52wYBOv/Hcg5w9ommFAYq0Wpp17FOf0258FMxi0xIZLZazaCD/e0S/MrStjVfLtd9qdfwBmMzHXX0/UmDEqoFIaRQRp32Tte/ZQuWEDUVdcTrv77/cOsttWOJxugk0G7jy9J0s2ZHDzfO3O0HvO6c0JXWOZ+c2/3qDqqUv6c0LXGPYWVLByWy5BZsMhDRtiMRmwqYnglWaQklPGlMUbWJ9axDn92vH4xUcTF9Z0NZFCiGpDLQAY9f8jbpWxamP8EGAJkwlaUWBVtXkTAB1eeJ6g3kcR1E3VVCmN57lbNvP++wEI7tOnzQVVAHaXG7PJQPf4MJbddTJphZWs3pnP5cd1IsZqYfG4oezNL+ef9GLO798eIQTd4sMaNcZPXYJMBspsrSv7rbQuLrfkrV9288yybYRajLx0xX8YOaB9QMYPNHoyViqwahv8+SZqbcXrjswszMnJRJx7bqCborRCNQcFDT/11AC1pGk5XG4s+jAJPRLC6ZEQzik1gqbOsVa/jhodZDJic6iuQKVp7M4rZ8riDfy5t5Az+iTy5CVHkxAeuInjDZ6MleoKbGP8UbxuNrWYrkBHdjbFn32OMSqSqNGja80kOLIyW/0dW0rgeCbTBug0/91qg+i2JXan9I4/1VyCzAbsLhVYKf7ldkveWb2Hp7/bisVo4IXLjuGiYzsEdJYLAH1udzXcQtvhxzdUgDJWjowMzEnaWDi2XbvIe+VVbNu3YduRAoCzoIC4ceMQhv0fDpUbNlC1eYt3ShZFOVjGqCgAoq+5GuvxbXcgS4fr4MewOlxBJgM2h6qxUvxnb345Uxdv5Pc9BZx2VAJPXdLfe4deoHlqrFRXYBvjj4hdmEzgdiNdLoSxcWPdHK6yVb+QevPNhPznP0i3C3dJKfbduwGIue46qv79l7yXXqZw4XsghHfAz6KPPwajkciRFzRLO5W2xxQdTc9VP2OMi2t441bM7nRjNjbvN/ogk1HdFaj4hdstWfjbXp76eismg+CZMQMYM6hjwLNUvjx3BaquwLbCnzVWJq27TTqdzRJY2XbtIuvRRwGo/PvvausSp08n+vLLcKSnU/TpZxR/9hnOvDyKFi8GIPKii0iYfHeb7b5RmseR8P4JRMbKooZbwO50s3pnHhtSi8koqmRzZjGnHZXIhccm0T3+4MYFayucLjdbs0pJigpp1PhSqQUV3PPxRtbsyufkXvHMGt2f9pEhzdDSg6MyVm2WP2qs9MDK4ahzUlp/se/dy66RL2KsmAAAIABJREFUoxAGA8mvz8Pcrh3GmBhyX3oZ64lDiDjnHAAsXbqQcPddJNx9FwDpd0/GYA2l/WOPNWn7FKWtsLvczV9jdQQPt7Biaw4vLd/B3/uKDli3Kb2El37cQXiQibBgEzNHD2BEr7Yf3APkl9m488O/+TUln27xVpbro/TXRkrJ+7/v48mv/kUIwcxL+nPZccktKkvlS90V2Nb4NWOlvbQHW8AupUTabLVO9lyXgvkLwOUi+c03q81p2P6RGfXu1+H55w6qbYpypHMEJLAy4nBJ3G7p7SZp60qqHBRXOHh79R525pQRHWrmxO6xzBjZj9iwIATw/u/7eODzTZzWJ4Ev1mfw977CNh9YLducxdyfd/H3vkLcUgu6d+WWk19mI7aW8abSiyq59+ON/JKSx0k9Ypk1egAdo1v2DAiqK7Ct8keNlUW/866eAna33Q4uF4YQLR1b8ccfZEy7D2d+Pr1+/w2DpeH0rruqiqLPPiPywgvrnShaUZTD53BJQszNUzPp4el6vPPDvxkzqOMBwzu0JUUVdl77aSdzf97lXXblCZ144uL+B2x71ZDOXPyfDliDTHy1MbPNT/sjpWTap/9QaXdx2lGJ3DRcG2vw8nlr+XtfEWf0Tay27aJ1qTy29F/cUvL4RUdz5QmdWmyWypd3gNDWG1epwKqpNCZjtWf0GGw7dtBn679IKUmbdBeu/HwA3KWlGGpM7Fwb286dyIoKwtrouEGK0pLYnW4iQ5p34FPPZ+HSjZks25zNXw+dSVhQ2/vT/dEf+3h86b+U6oOh3n1mLzKLK7n+pC517mPVX4cjYdqfogoHBeV2HrygLzcO04KqKoeL8CATN81fx6e3D2Vgp2gyiyuZ9sk/rNyey5BuMTwz5hiSY1p2lsqXJymrugLbCn9Owmz+P3t3HhdV1T9w/HOZgWGVHUQUwSUXVhEVt9zR0sxKMzNbrGw1y7JFy2zxycpsfazHSu0pU3sybbNSc/u5g4obIoggCIrs+zrc3x8DEyggIJv6fb9e5cyde++cO8DMd77nnO/5Z/B6TYqiow37qCrFsbHo09Kw6NWLgsOHUQsLr/gc2Zs2cWHBGwDouna56jYLIWpn6Aps3m/99/TpQEcHS9pYmDL1q/08tOIAHewtWTI5oFnb0ZT2xqTx6obj9PKwZ/64nljrtHg61b3IqpnW5JrJWEUl5xDy4U5+mzkIH3fbOh8XV74AuKfjP0GSuamGx4d25v2/TjHjv2E8N+omFv0RSale5Y3x3kwL7njNdR9LV6CoWT3GWOXv20fyoncBsBo0kILDhykrKqr1mNydO0l8ZhZgKM5o5uFxlQ0WQlxJSwxet7M04xZfNwpL9HRytiI0LoPQuAxu7+VO747213z2Sl+m8vwP4Tha6fh0Sq8G1VMylKS4Ngb4L9kUBcD/RafWK7A6m5YPcFlV/6eGdaGNhSmvbTjOvPXH6eNpz/sT/esVmLYmMivwetOYizBrq2asCk+eJHvjRsw6dcZq4AASZz1r3Df+oekAtHt3ESbWhqnDZQUF1Z43c/0Gsn//naKYGEw9PHB7801MLMyvy3XZhGhtKi9p09zMTTVsfX4oGXnFjFiygweWH0BjovDbzEH0cGvTIm1qDMt3xZKUVcgX9/VucJHKlshY7TuTRmdna5xtap/1raoqRaVl6LQmhMZl8OeJCwBY6eo3Vi8uLQ9FgQ4Ol5dImBDQjoT0fPp4OjCiu8s1l6WqzERmBV6nGmMR5oquwOIS9Lm5xN49udZFmR0eno7t7beTu2u34bgaMlbnX3kFAI29PW4LXpcB60I0kpScIizNNFjptGTkFVNYqr+szk9xafPXsbqUvZUZPz81kHWHzvHRlmjiUvOuycDqQlYh838+zqaIZIZ2c2a0d8OX1NJpm3fZn+LSMu5Zto/ubW3489mba9135Z443vg14rLtGXn1mzF+Ni2fdrYW6LSXB2Q25qbMvbVHvc7XWmmkK/A606iLMBte2pQPl1CSfBFKSrC/915ytm6l9MIFbG4Zg2Vgb0ysrDg/dy5tRo0CwMTc8O2nujFWpampAFgGB+OxYvk1McNDiGtBVkEJQ9/fhrW5lh1zhhG0cAv6MpW4RWOr7Feib/61AqvTwcGSyX068NGWaDILWseapPX1r40n2RSRDMALId2u6v3MTGvSrAtVx6cbuuUiL+SQmlvEhaxCTBSFY4mZXMgq4ucjiax6pB//2XGGlXvijMdNCGjHtP4deXB5KJkFxVd8nrjUPMpUlezCUtYfTmRA5ytPaLrWyazA61UjBCza8qU98vbsxaxzZ1znv4bDvfdif9995P3fTuwmTcLE0jAI0WbkCDRtDN84FXPDN+SywqoZK1VViR40GIA2Y8ZIUCVEA4UnZFJcWkZfLwfjtt+PnievWE9esZ6b39tm7IYoKtWj02o4n1XAlztjyS0sbRWBFWCcnZh1DQZWqqqyPzaNod2cWfnQ1a8t2ZwZq8ISPZsiDN151jotr64/buzeq6z/O1uNt58d2ZV7+3rgbKNDURRsLU3ZHJHMOD83end0uOxYgEPxGdy5dE+VbYO7Xt91uuCfj1/pCrxOKI04yMq8Rw+6bN+GibUNGut/BhHqOnmh6+RVZd+KoAoqZayKCklftYqUJR9i2s6N4rizlfa3abR2CtHabIu8yJaTydXWLrpavxxJ4pnVhiWfennYEZ+Wj1ajkJxdRA+3NrjbWbD7dKpx/70xadzkasOdS/dwMacQO0sz/DvUfcBxU7Iw1WCmMSEz/9oLrJKzi0jOLuLJoY1Tk6u5MlaqqjL+s11EJecCYGOuNWav7g5qz7RgT2wtTLn5/W0ADOvmzNxbe9DZ2brKuCcrMy2nknO46/O9rJkRzNFzmZToDYFEQbGexMwC1h9OxMJUw8I7fDDVmDCgs2O1RUCvN8auQAmsri+NlQ0ybdu2/s9dXnE9e+Mf5GzeDEBR9Gnj41pXVyz7Xv03PCFao6yCEh5aGQqAh4Mljw3pTFZ+CbaWhuzMh5uj6NbWhlt93VBVlRW74xjt0xZ3uyuveXb0XKYxqGpna461Totfe1s0JiYM6mLKtP4d8W9vS2mZSmJGAUMXb+fBFaF0cbHmQnYh88f1ZPogrys8S/NRFIU2FqZkFRSzNTKZlJwiNkdc5N5+HRjeveHjlZpDbKqhdEBjrfNnptU0S+YuKjmXqORcpg/0IjQunWOJWVzILmRCQDvem+hv3O/PZwdjYaq5bAZfhfb2FpxKzgEMBT5rckegO3cGtm/ci2jljLMCZYzVdaIVdK+ZlK8rWBFUWQ0ahGXfvmSsWoXL87OxHT++JZsnRKNLyy3is22nycgrJiYlz7j9nT8iCY1LZ8vJi3Rxsca1jY7dpw0FdF8c040ebm1487cINkVcYM2M/jWev6BYT8hHO0hIN8y0XTMjmOBONY9VMdUoeDpZ8eX9QTz+3UEKS/S8OKZbqwqqKthZmhKRlM3qAwnGbbtPp3J4/ijMm7lCfH1U1GTq6Ng4hSt1WhOKSpqm3EJhiZ7SMhVrnZb9sYbfv4cHe+FobcaxxCxUlctmBnZvW/tkgo+n9CIxo4Ddp1M5npTFU8O60N7egsKSMl7dcJxpwR0pKNHTz6v6bsLrmYlkrK5XLRdgKZesEejywvOYd++O04xHW6hFQjStv04ks2J3nPF+UEd7pvXvyEvrjrLl5EUATl/MRWuiGCtsv/fnKeP++cW1f6B+uy+OhPQC/NrbsnRqYJ3XShvV05UTb4xu1QGKs7WOvWcMH/YWphqm9vPgq12xBL/zNyaKQv9Ojvx7amALt/Jypy/mYqYxoV0dMo11YdaEY6wmL9vHkYRMene059i5LNqYa2lna85DAz3p6mLNr0fPc3uAe73Oaa3T0q2tDd3aVh3WodNq+HRKr8Zs/jVJY6JIxuq60YoyVgDdI06gmLSOgbJCNJWz6XmYaUzYPmco838+zuQ+Hozq6crtAe6k5BSx5kA8ecV6Xr6lOwCRF7JZG5pgDMYqCmSqqkpesR5rnRZVVTmfVciRhEz+tTESgG8f7lfv5Whac1AF8O5dfiRk5OPX3hYbc1PiUvP4alescdxVRYaltSguLSM8IZPfj56nj5e9cTzN1dI14RirIwmZABw8mwFgHIBuaaYlxLstId71H/IhaqdRFJqxekajk8CqOi0ZYFUq9ClBlbgRxKfl097BgnZ2Fnz1QJ8qjznb6Jg5omuVbd3btmH+uJ5M7deR+77aT3x6Pj+HJ/LfvWc5FJ/B948EE5uax9z1x4zHzBrRtdnX+GsOHo6WeFTqTvNwsKSXhx0Te7fn5Pls/jh2+Wy1lrRyT6wx0J07tvHqLjXVrMDEzH8KNf82cxAHz2bQ1bVxxoWJmimK4YvStUoCq1ZGURScZj6NVf8BLd0UcYNRVZWX1h2lr5cjE3s3z4BZVVWJSs6hYz0XiVUUhS4u1twR6M7n22OYtSYcSzMNqgpTvtyHxkTB192Wri7WpOQW8cwlwdn1ysREYf2TAwFY8MsJSlvROJWDZzOMQdWW2UPo4tJ4AYpOqyElp4gNhxOZ0Kt+3XI1Sc8rZtji7QD89OQAfNxt67UEjWg4jYki5RauG424CPPVcH7qqRZ9/uaiqipxafm0t7doNbWBbkT6MpUtJ5P5bt9Z/i86lR8PnmuWwCo5u5BfjyQRk5LHgwM8G3SO50fdxF2B7QEV1zbmLPojklX743G3s+CLab3rNFvwetWaPpxUVeXNX08AhjF0jRlUAfRs1wZFgWfXhpOUVcDAzk6Ym2ouG8NUHxFJ2RSXlvH8qJsI9LBvxNaKK9EoMsZKiAb57eh5Zq4+zMODvHhtXM+Wbs4N6/+iU3js24PG+ze5Nn2dNH2ZytSv9nP6Yi5eTlYNzjJoNSZVPqSfGtaFrIIS3rrdB3srs8Zq7jVJa6JQWtayA1UKS/T8deICH26OIi4tHx/3Nnw4OaDRn+fuoA7c3NWZUR/uKJ/YcIp2tubseWVEg85Xoi8jNC4dgHv6ygL3zc3ERJFZgdeNVpKxulEcjjcMCk3KrH7BadE4MvKK+XpXLH+euICTtRkPDvBijM8/A25PJGUDsPX5IXy+PYZNEcks3X6apMwC3rrdp0mq/O86ncrpi7m8OrYHU/p6YKVrnLeidnYWfHZv65sF1xJaOmN17FwWt322CzDMWJw+0ItXx/ZosgWC29qas2POMB7/9iAH4tJJz695yZi41DxiUnLJKighxLttlQkQxfoyXvjfUX49koSFqQYn6xs7QG8JMitQiAYoKNbz3T5DNfmcwtIWbs317cEVBzhyLgtFgfOZGp5de5i/3G4mPCETe0sztpxMxt3Ogk7O1liba8kqKDGWM/huXzzfPty30ZfS2BxxAQtTDfcFd2z1M++uVRoTpUXGWG06cYEVu+OomHtz803OLLnbH6dmqBruYGXG2seCWfRHJCt2x6Gq6mVfDI4kZPLc2nDOlBcpHe+fwidTejFv/TG2n0qpMmDd0MUoX7Sbm4nMCryOyN9Ps9lyMtk4i6cui5GK+iksL5a4NjSBI+eymDm8C7NH3cSB2HQmL9vHkPe3V9n/8SGdAejqYugGfG7kTazaf5aLOUX8HJ7UqIFVQbGeX4+cZ0QPFwmqmpDGREFVDYUWmypLVJ2fw5OMtbVCerqy7P6gZntuMExscLbRUawvI6ugBDvLfzJO0ck53P7v3cb7jlZmhMalk1dUyvcH4qmcJBnn58ZLY7o3Z9NFOY3JDTArUFEUO+ArwAdQgenAKWAt4AnEAXerqpqhGML7j4FbgXzgQVVVDzV6y5uSBFhNLjm7EICRPVyIvJDTwq25/jz9/WG2nEwGoI25lil9PVAUhSBPByb1bo+jtY7gTg6YaUxwstHRtXyc0uQ+HRjazZl2dhY8NawzT31/qFFrISVlFjDly31kFZQ0eMC6qButyT9Lg5g045ta5S9K4wPaNdvzVubSxlBo+e+TF8ksKKGwRE/khRx+P5oEGBaw/uqBIA7EpvP+X6fYfToVVYUVD/XBRqflrxMXmHtrD8lWtRBDxuo6D6wwBEp/qqo6UVEUM8ASmAv8rarqIkVRXgZeBl4CbgG6lv/XD/i8/N9WT/6Imk9KbhFmWhPa21uy70w6pfoyNCZKo/wMikvLeGb1YdzszLn5JmeGdWuchV5bO1VV+evEBU5dyDUGVU8M7czzo25CWz7rUmOi8P4k/xrPoTFRjNWwtRoT/Nrb8deJZHKLSo3jUK7Gks1RnM8sZMnd/gR53njLdTQnTXlfnL5MpTkTg2m5xQzs4si8W3vSw61lFozvUT4b8Pn/Hamy/Y5e7oz3b8ew7ob3hLRcQxD4zJrDWJlp6OPpgLVOK7+bLczkep8VqChKG+Bm4EEAVVWLgWJFUW4Hhpbv9g2wHUNgdTvwX9WQx9unKIqdoihuqqqeb/TWNxEJsJpeSk4RztY6tCYKuUWldJn3B2AIBJ4Y2hkbnbban8PBsxnsik5l1sjq6xL9fvQ8q/afZU+MIcuyYnccy6b1xkqn5WJOIX7t7Rpt4dfWZtX+eF7dcByAgA52fPNQX+PixQ1V8VrFXMzFv4NdjfsVFOvJKSrBxca8xn1UVWVHVAoh3q433MKyLaEiY9Xc46zS8ooJ6GBHz3a1r5fXlLq62nBg7gi+3XcWfZmKo7UOT0dLhnd3qfK+MqirEx0dLTmbls9Twzo3ypcHcfU0N8CswE5ACrBCURR/4CAwC3CtCJZUVT2vKEpFWsAdSKh0/LnybddAYCUBVX3oy1SWbD7FvjPpdHWxZs7objha60hIz8fR2gxLs5p/vVJyinCyNqOXhz0Qy8Te7dlwOJHPt8fw+fYY/Nrb8svTg8gpLOFsWj6Z+SX8dOgcPx1OBAyLoF76JliiL+O5H8IpLi3j1bE9mBTUgaC3NzOjUimBQV2c+O6RayKBWm/bIi/ibmfB788MwtbCtFG+IHQv/+b/0ZYoVjzUt8b9bv/3LqKSc4lbNPayxwpL9Jibathy8iIpOUWM6ul61e0SV1axXIxe33wfUKqqkpFXjEMrKHXh0sac50O61bqPtU7Lz08NJDQug+Hdb4zM9rXAMCuwpVvRcHUJrLRAIDBTVdX9iqJ8jKHbrybVvZtf9hIpijIDmAHg4dHa6oRIgFUXu06n8u9tMZhqFA6ezeDn8CTc7S04fTGXgV0c+e7hfsYP96jkHM6m5bP+8DliLuZxKjmHcX5ujPVzI8T7Fkw1Jrwx3pu9MWk88t8wjp7L4ufwRBb9Ecn5rMLLnjs+Ld/4jXj1gXgW/n6S3CLD7MKP7wkwLora060NR85l8ebt3vx29DxZBSXN9Oo0v2OJWQzs4lRlsO7V8nSy4oH+Hflm71meXXOYiPPZ9O5oz2vjemJppqWoVM+JpGyiknMvO/Z4YhbjPjVMt581oisf/x1NJycrbvV1a7T2iZppNRUZq+abXpVVUEJpmdoqAqu6srM0k2C/lTFRaHDGqrqZoM2tLoHVOeCcqqr7y+//iCGwSq7o4lMUxQ24WGn/DpWObw8kXXpSVVWXAcsAgoKCWkdsKl2AdRKflk9BiZ6Pt0RhZ2nK/rkjOHk+h2/3nuViTiEaRWH36TR+OpSIu70Fu6JT+WzbacCwptfgrs4M7urEI4M7ARirrlvptIzs6crfzw/hgeUHmLUmHHNTEz6Y5E9+iZ7h3V3IyCtm3Ke7+G7/WewtTdkVncqRc1n07+RIWl4RUcm5VcZHfHB3APHpeQzv7sr+M+lEXshu/hesGUQn53Axp4heHjV31zXUMyO6sj82nQ3hhj/jqORcTBSFhXf48t6fp/h6V6xxX32ZasyUfPx3tHH7x39H08OtDV/cFyhV9puJMWPVjF0qFV+C3Gxv3Ir34uppTBTKGjDGauHvEaw5kMCxN0Y3Qavq7oqBlaqqFxRFSVAUpZuqqqeAEUBE+X8PAIvK//25/JBfgKcVRVmDYdB61rU0vgqQAKsGqqqyZHMUn249bdy2eJI/Oq2GgA52BJSPwSks0TPk/W1VBo7e5t+Oe/t60NnZyjhjpyadna1Z/+RA1h8+R5CnQ5XlJGwtTDHVKHy/Px6ALi7WzBzehVkjuqIxUUjKKqyyjEkXF2tjZW4LMw0FxfqrfyFamaJSPWM+/j+AJvnm7Wit489nb+b9vyIpUw11gI4nZpGeV8z3++MZ7e1KcWkZ206lkF9cio25KaqqElteJwjAtY2O7x7ui2Mz1DISBs09xiojr5j15V317exq/xsXojZpucVEJV9g6lf7mD7QixE9rvy+Vlam8uX/Gb7ktXTWqq4j9WYCq8pnBJ4BHgJMgB8URXkYiAcmle+7EUOphdMYyi081KgtFi1CVVWGLd5OXFo+t/q2JaRnWxyszLj5psvrG5mbatj4zGAOxWdibmqCu50FXk5W9fpFd7bRMePmzpdtt9Zp+W3mYFJzi3Cx0dHFxbrKeWtbG87STEN+yfUXWG09eRF9mcoTQzs3aaZgzmhDTZ9564/x65EkXt1wjMJSPXNGd2N/bHp5YKUnPCGTF/53hOTsIt6a4MPEwPaYahTjzETRPCrPCmxqZ9PyGPHBDmMQdyOv0Siuno+7LTuiUth9Oo3dp9OIfGsM5qYaTl/MITGzkCHlnzsJ6flEJeegL1P58v/OGI8v0auYaVt5YKWqajhQXZW3yxZiKp8NeG2uIixL2tRo3aFE4tLyAXh1bE/jlPyaOFrrmmzcQre2NnSj/tO4Lcw05F8nGavzWQX8a2MkOYUlJKTnY6PT8sIVBuo2Fg8HS7ILS9l47ALzbu1BFxcb47I4uUWlvP/XKZKzi+je1obxfu2wMJMioC2hOTNW3++Pr/I8zVFlXVy//jOtNzmFpfxnRwxf7YplT0wqw7u7MnLJTgBOL7yFA7Hp3PvV/mqPL9GXYaZtuS9yMrdUVOuPY+dZHZrA/cEd+b/oFL7ZexYvJyv+mDX4mq2WbWmqpbi0jFJ9GVqNCWVlKh9uiaKflyODujq1dPPq5d/bTvPrkX+GLg7q4mQcU9PUBnZxoouLNY8O9mJyH8PEk4oZoMcTszh6LovpA714bZwUWGxJJsYxVk0/eP1EUjY+7m14f6I/J5Kym7XSu7j+mJtqMDfVMGdMN34IS2DxX1Es3RZjfPzk+RxjSZ0Kt/i0pa+XA2/8GkFJC6+HI4FVZZKxMlq+O5bQuAx2RqUAcGcvd16+tfs1G1SBoSsQIL9ETxuNCb8eTeLTraf5lNMcmDei1hpMrcmZlFy+3x/Pvf088HO35b2/TjHev/kqXPu427Jl9pAq26zKX9tZa8IBQ5kGCapaVnNlrC5kFXIkIZNbfNvSw60NPdxarn6VuL7otBremuBjfF+p8Mr6o6TnFuPrbksHBws2HrtAkKeDcWJMsQRWojUp0Zfx3p+RhMZlcH//jgR0sENjonCrr9s1P5urokuqoFjP7uhUZq0Jx8pMQ16xntd/PsEnU3q1mmvMKyrl4NmMasewHUvMokyFhwZ40tXVhnv6tny5EqtLaorJ9PWW1xyzAlVVZebqQ5SpKg8P6tRkzyNuXLcHuBsDq83P3czhhExeXncUSzMtSyYH8OVOw9gqDwdL0vOKACht4SJYElhVdp1+wc4pLGH7qRTsLE0Z1MWpxormb/8ewZmUPLIKSuje1oY7A9sbZ/pdDyoyVnlFpbz9+0mcbXSse3wA7286xa9HklgTmsC04I4t3EqDhRtP8v3+eP6YNZgebm2MC5IqisK5jAIA3O1bzwBhy0rjqH56cgD211Ado+uVtgkCq7IylYe/CaWNhSkf39OLY4lZhMZlsOC2nnRr2zLL14jr3/SBXizfHUsnZ2u6utowoLMjFqYaHK11ONvosDDTMLirExuPGQoQSFdga1SPAKulp3VeqnIdoQtZhfx4MIHwhEy2nDSUGVt4hw9T+1UNHs5nFfDGryc4ei6LyUEd8G1vy32tJMBoTBUf/q9uOE5iZgGLJ/nj4WjJu3f58uuRJA7GpbeawCoh3TBR4EBsOnlFpUz8Yi9dXazZ9NzNnMsowMGq9sr2zc3ZxjBYec7oblXKY4iWo2nkrsDD8Rl8sSOGbacMwwOeGNqZ8Z/tBuDO3rJEkWg6r47twYtjuhl/p9vbWxof6+xszWf3BgL/1ESUwKoVqU+AlJZbxLz1x/nzxAUsTDUceT2kRWchgGHK85D3tzOpd3uK9WWk5xXzf9GpgKGOVHRyDm/+GsH/ws5hosA4v3aUqSqLN52isKSMRXf6topupabi5WSNosCemDScbXSMKF/CwtJMS0hPVw7FZ7aaQLnijWH36VSW7zbUZom+mMuh+Ezi0/Na3XR2O0szTr45RmYAtiLaRi63sGRzlPH9BGDNAcPKZVP7edDG/OrWpBSiNiYmCuYmV35vMY6xKpWuwFan4oP19MUcVNWwoGeFizmFvPC/o8ZB3QAFJXqSMgvwdLJq9rZWtrP8Te9/B89V2f7gAE9mh9zE6Yu5fL0rlk0nLlCiVzkUnwlAH0975o3teV11+1WnW1sbfnlqEMnZhQzv7lJl5tLw7i5sikjG65WN+LW3ZfmDfXCy1vHen5HEp+fz6OBOtS5C3JhSc4sIjcsAYMvJZMpUw8/w+/3xvPVbBOEJmTw8yKtZ2lIfElS1LsaMVSOMN9kRlVIlqAJYuSeOQA873p7gc9XnF6IxVNSukoxVC1NVla2RF8un2//zQZueV8zIJTtRFDj55hjjbLg/jl1gZ5RhvFJIT1c2HrtAblEp57MKWzyw2nfGMP102bTeuNtb8M7GSEb1dOWBAZ4ABHrYE3ivPdmFJei0Jkz8fC+WZhrWzAhuFVma5uDb3hZfbC/bPtbPjQ3hiZhqTNgTk0bIhzv54bFglm43TPHddCKZY2+EoNM2fvDw3b6znL6Yy4Lx3qiqyrhPdqEvU3nllu4ciE2nk7MJ5OmfAAAgAElEQVQVL9/Sg5TcIn4/ahhD0JyzAMW1qWKtwMbIWD2z+jAAfT0dmDmiC9O+PgDA2xN8b5j3DtH6SVdgK7EnJo2HvwljYBdH5hWUoADnMgp5fbnhjUNVIS4tj+5t27AnJpXXfzlBBwcLds4ZhqrC40M6M/yDHZzPKmixaygq1fPUqsNsOZnMjJs7EeLdFoDvHulX7f4Vaft1TwxAUerXBXq9sjE3Zc2M/gB8sOkUn249bSxGF9zJgX1n0skv0jdJYPXqhuMA3BfsQVZBCReyCxnv344ZN3fisSH/VJ9/fVxPdBoTHhvSWQYKiyv6Z4xV/T9ktkVexMlah297w5eQDg4WZCWW8MLobvT1cuCL+wLRaTXGhdCFaA2k3EIrsTkiGYDdp9PYHp3KMOC3o0kkmRdwZ6A7Px1K5H9h5/B1t+XFdUdxszXnnTv8UBQFRflnsdHEjAI2HjvPks1RuLbR8fiQzigozVJ48pfwJLacTOYWn7a8OLru1bdbekxYa/V8SDdW7Y8nPa8YMIxF23cmncLShlVtLy4tY0dUCpZmGjydrNBpTXCy1hGbmkd2QYlxv4pAzsJUw7/uvDwT4NLGnCWTAxp4VeJGczWzAh9aGQpA3KKx/ByeyPHEbG71NRRgBBjj49Z4DRWikVQEVlJuoYWdSMqid0d7w/iiMMM2e2sdO18chsZEYfOJZL7eFWvc/507fasESxZmGm5yteaDzVHGbacv5rL7tKFbLm7R2CZtv6qqLN8dR/e2NiydGijZp0by9+wh9HprM8O7u2ClM2SpCksa9i3ouR/CjV14FTo4WJCQ/k+W89HBXmwITyIlp4hxfm5Y6274P01xlSrPCjyemMWxxCwmBLhfcSxcam5RlfsVNYQqvmgI0VqZalrHGKsbPmURk5LHTa7WzLj5n+J29hamWOm0mJtq2PniMH6bOYjHhnRiy+whDO3mctk57u/vaby94qE+jPMzfJuzaYYPx6SsQk6ez2Zynw4SVDUieyvDLLcv7uuNubYisKp/xuqdjSf5/eh5el5SjbpyUGVrYcrcW3uwY85Q3rrdmwXjva+u8UJQdfD6098f4pWfjnHbZ7vIqpQlrXAkIZOzaXmoqkpUco5xe0alYKpiEW4hWisZY9WCCor1JGbmAwrpecV0drbGtY059paGooZqpQDF3soMeyszfNwvH/BcYWo/D+wtzejr5YCzjY5h3VzQaY8YB5M3lcISPdtPGepT1dY+0TAV3+wrJi4UNCCw+uvEBYDyWYZmrA5N4LUNx9GYKPz81EAOJ2RyXz8PFEXB0kzLtEpBuhBXo6Ir8HhSFnFp+Qzt5sz2Uyn4v7GJXS8NM9YCik3N4/Z/G+pRff9IvyqB1bby95dfnh6IX/vre9awuPb9M8ZKugKblaqq3P2fvRxLzALA3tKUW3zLxws0MOGjKApj/aqOObDSacgvLq3XeUr1ZWyKSObouSxeGtOt1gxUWm4Rd/9nLzEpeQDc5CKDmZtKRWBVl4xVQbGe+PR8urW1oVRfRlJmIY/d3Im2toZ1CKcFd8RGp6WHWxu6tbWRgFg0GU15HavPt8dgosAHk/yZtSacXadTGfr+dl4f783p5JwqZRSiknOIuphrvD/7hyNYmGrwaSe/p6L1M6vIWJVKxqpZHYrPMAZVvu62LBjvbSy2aIxxG6FLzdJMS15x7R/Efx4/T2pusbHK+YxvD7I10vAN8ZHBXjhZ6y47JrfIEKy9/ftJYlLy8HKyondHe2wtpUBfUzE3NfyxFtVhjNWYj3dyNi0fdzsLUnKLKNaX0dnFuso+E3q5N0k7hajMzdacQA87Is5nM8a7LY7WOr57pB+7olO57+v9vFY+G7WyBb9GXLZtUlD7KjXfhGitTKWOVctYuecsNuZadr00HFuLqsFIY751WJlpKC4to0RfVuPCvo9/dwiA+4I7kl1YwrZTF9GYKOjLVHIKS9GaKITGZdDZ2YpOzta89ONR1oYlGI+/t58H/7rDtxFbLapT14xVdmEJZ9MMS9EkZv4zhmpkD1mQWDQ/c1MNPz058LLtg7o64eFgSXz5sklgmCFcXP4tv6+XA3NGd0OnNcFap6VdK6vyL0RNjGOsmnDh8bq4oQKrwhI9f524wJQ+HS4LqgDUitCqMTJW5QPX84v12FrUPkegqFTPD6EJqCo8MMCT5btjScst4r0/I/njuGGMzsgeLsb1/sCQ1h/nL1Oem4MxsLpCuYVHvwmrcn9QFydeubU7DrIgsWhlKr4kPD6kM1/siMFapyW91DBQfc2jwZKhEtckU+kKbH5HEjIpLi1jUFfnah9vzEl1VuUDn/OLS7G1MCUsLp29MWk8NqQzphrFWD8LYOqX+wk7m0EfT3tG9nBh+e5YnvshnIT0AqzMNOQV66sEVT8+3p8gT4fGa6yoVUVXYEFxzX+sJ5Ky2B+bXmXbv6cGVhvAC9HSKj6Agjs58MWOGFzbmPPB3f5kF5RIUCWuWWYyK7D56MtU9GUqO6JS0Jgo9PG0r2FPpdL/r05FxiqvSE9mfjGT/rMXVYWl22O4p28HVuyOM+4bdjYDdzsL/ju9HwkZhvR8xXT8ib3bc09fD576/hB9OhqWk6i8srdoehbVdAVWXqy5RF/G3V/sBeCr+4N45L+GzJUEVaK1+s+03vx0KJFBXZx4eJAX9wV3xKuFl+QS4mrptCZ8dX8QXV2tr7xzE7ohAqstJ5N57NuDAAzo7Iid5RW6ZhohsqrIWL344xEuZBWiqvDMiK58uzfOGFS52OhIyS1CVWHWiK5YmGmqrBL/QshNPDTQCyudlq3PD736RokGqegKrJjlGRaXzn1f7+f7R4MJ9LBn8aZT5BXreW7kTYzs6crbE3wI7uTYkk0WolY+7rbGGamvjevZwq0RonGYmCiM7NnyY1pviMCqi4s1L4TcRJkKo8vX0auO2oh9ge3tLVEUOBSfSS8PO+aM6cYdvdrjZG3Gf3acYcnd/vTr5EhuUSlf/18st5Uvqls5y/H08K6N1h7RcDqtCYoCizdFMairMw+tDKWwpIz/hZ1j35k0/rPjDIO6ODFzeBcA4yxPIYQQN54bIrDq7GxdzyDl6gOsbm1tOPJ6CBammiqzAu/v71mlUru1Tsuskf+0rWI8z00tnMoU/1AUhQ/vDuDln44yobyQIsDqA/HG2+P928nYFCGEEDdGYFV3jfvBWLlbr84tUBT+mDVYpji3MhN6uVNUqueldceqbJ/StwOrDyTg30GqUgshhJDAqqqKuKqF19zrccm6cqJ1uNXXjRW743CzNefxIZ0x05rQy8OeWSNuMlZWF0IIcWOTwEqIOrIxN+XPZ2++bLsEVUIIISrUXrnyRtXCGSshhBBCXJsksBJCCCGEaCQSWFXWSsZYCSGEEOLaJIFVJUojzwoUQgghxI1FAqvqSMZKCCGEEA0ggZUQQgghRCORwKoakq8SQgghRENIYFWJctkNIYQQQoi6k8BKCCGEEKKRSGBVLUlZCSGEEKL+JLCqTOIpIYQQQlwFCayqI+UWhBBCCNEAElhVQ+IqIYQQQjSEBFaVVMRTqkRWQgghhGgACayEEEIIIRqJBFbVkoyVEEIIIepPAqtKKsIp6QkUQgghRENIYFUtiayEEEIIUX8SWAkhhBBCNBIJrKojCSshhBBCNIAEVpUZB1dJZCWEEEKI+pPAqhIJp4QQQghxNeoUWCmKEqcoyjFFUcIVRQkr3+agKMpmRVGiy/+1L9+uKIryiaIopxVFOaooSmBTXkBTUGRaoBBCCCEaoD4Zq2GqqgaoqhpUfv9l4G9VVbsCf5ffB7gF6Fr+3wzg88ZqrBBCCCFEa3Y1XYG3A9+U3/4GmFBp+39Vg32AnaIoblfxPM1PElZCCCGEaIC6BlYqsElRlIOKoswo3+aqqup5gPJ/Xcq3uwMJlY49V77tGiKRlRBCCCHqT1vH/QaqqpqkKIoLsFlRlMha9q0uKlEv28kQoM0A8PDwqGMzmphMChRCCCHEVahTxkpV1aTyfy8C64G+QHJFF1/5vxfLdz8HdKh0eHsgqZpzLlNVNUhV1SBnZ+eGX4EQQgghRCtxxcBKURQrRVFsKm4DIcBx4BfggfLdHgB+Lr/9C3B/+ezAYCCrosuwtVOquSWEEEIIUVd16Qp0BdaXlyDQAt+rqvqnoiihwA+KojwMxAOTyvffCNwKnAbygYcavdVNTeIqIYQQQjTAFQMrVVXPAP7VbE8DRlSzXQWeapTWtRiJrIQQQghRf1J5vRIJp4QQQghxNSSwqoZiIiGWEEIIIepPAishhBBCiEYigZUQQgghRCORwKoyY4FQ6QoUQgghRP1JYFWJIsPXhRBCCHEVJLCqhiIZKyGEEEI0gARWQgghhBCNRAKraknGSgghhBD1J4FVdSSuEkIIIUQDSGBVmQRUQgghhLgKElhVSyIsIYQQQtSfBFaVGMMpWdJGCCGEEA0ggZUQQgghRCORwKoakq8SQgghRENIYFWJBFRCCCGEuBoSWFVHIiwhhBBCNIAEVlUo5f+XyEoIIYQQ9adt6QYIIYS4MZWUlHDu3DkKCwtbuilCVMvc3Jz27dtjampa52MksKqOLMIshBBN7ty5c9jY2ODp6Yki77uilVFVlbS0NM6dO4eXl1edj5OuwMrk71oIIZpNYWEhjo6OElSJVklRFBwdHeudUZXAqjryRy6EEM1CgirRmjXk91MCq0qUy24IIYQQQtSdBFbVkshKCCGEEPUngVU1JKwSQojrX2ZmJkuXLgVg+/btjBs3rkXbM3ToUMLCwlq0DXW1cuVKkpKSjPcfeeQRIiIiWqw9//rXv67q+AULFrB48eJGaYsEVtWRPn8hhLjuVQ6srlZpaWmjnKc10ev1NT52aWD11Vdf0bNnz+ZoVrWuNrBqTFJuQQghRIt749cTRCRlN+o5e7Zrw+u3edf4+Msvv0xMTAwBAQGYmppiZWXFxIkTOX78OL179+a7775DURQOHjzI7Nmzyc3NxcnJiZUrV+Lm5sbQoUMZMGAAu3fvZvz48Rw7dgwLCwsiIyM5e/YsK1as4JtvvmHv3r3069ePlStXAvDEE08QGhpKQUEBEydO5I033qjT9Xz99de8++67tGvXjq5du6LT6fjss89ISUnh8ccfJz4+HoCPPvqIgQMHsmDBAuLj4zlz5gzx8fE8++yzPPPMMwB89913fPLJJxQXF9OvXz+WLl2KRqPB2tqa2bNn89dff/HBBx+wdetWfv31VwoKChgwYAD/+c9/WLduHWFhYUydOhULCwv27t3LLbfcwuLFiwkKCmL16tX861//QlVVxo4dy7vvvguAtbU1s2bN4rfffsPCwoKff/4ZV1fXaq/1119/5e2336a4uBhHR0dWrVqFq6srubm5zJw5k7CwMBRF4fXXXze+lgEBAXh7e7Nw4ULGjRvH8ePHAVi8eDG5ubksWLCAL7/8kmXLllFcXEyXLl349ttvsbS0rNPrX1eSsaqOZKyEEOK6t2jRIjp37kx4eDjvv/8+hw8f5qOPPiIiIoIzZ86we/duSkpKmDlzJj/++CMHDx5k+vTpzJs3z3iOzMxMduzYwfPPPw9ARkYGW7du5cMPP+S2227jueee48SJExw7dozw8HAAFi5cSFhYGEePHmXHjh0cPXr0im1NSkrirbfeYt++fWzevJnIyEjjY7NmzeK5554jNDSUdevW8cgjjxgfi4yM5K+//uLAgQO88cYblJSUcPLkSdauXcvu3bsJDw9Ho9GwatUqAPLy8vDx8WH//v0MGjSIp59+mtDQUI4fP05BQQG//fYbEydOJCgoiFWrVhEeHo6FhUWVdr700kts3bqV8PBwQkND2bBhg/HcwcHBHDlyhJtvvpkvv/yyxusdNGgQ+/bt4/Dhw9xzzz289957ALz11lvY2tpy7Ngxjh49yvDhw1m0aBEWFhaEh4cbr6Mmd955J6GhoRw5coQePXrw9ddfX/G1ry/JWAkhhGhxtWWWmkvfvn1p3749AAEBAcTFxWFnZ8fx48cZNWoUYOgec3NzMx4zefLkKue47bbbUBQFX19fXF1d8fX1BcDb25u4uDgCAgL44YcfWLZsGaWlpZw/f56IiAj8/PxqbduBAwcYMmQIDg4OAEyaNImoqCgAtmzZUmV8U3Z2Njk5OQCMHTsWnU6HTqfDxcWF5ORk/v77bw4ePEifPn0AKCgowMXFBQCNRsNdd91lPNe2bdt47733yM/PJz09HW9vb2677bYa2xkaGsrQoUNxdnYGYOrUqezcuZMJEyZgZmZmHMfWu3dvNm/eXON5zp07x+TJkzl//jzFxcXGAp1btmxhzZo1xv3s7e1rfd0udfz4cV599VUyMzPJzc1l9OjR9Tq+LiSwqoYkrIQQ4saj0+mMtzUaDaWlpaiqire3N3v37q32GCsrq2rPYWJiUuV8JiYmlJaWEhsby+LFiwkNDcXe3p4HH3ywTgUoVVWt8bGysjL27t1bJXN0pWt64IEHeOeddy7b39zcHI1GAxgKuD755JOEhYXRoUMHFixYcMW21tZOU1NTY12oirbUZObMmcyePZvx48ezfft2FixYYDz/lWpLabVaysrKjPcrt/nBBx9kw4YN+Pv7s3LlSrZv317ruRpCugKrJZGVEEJc72xsbIyZnZp069aNlJQUY2BVUlLCiRMnGvyc2dnZWFlZYWtrS3JyMn/88Uedjuvbty87duwgIyOD0tJS1q1bZ3wsJCSEzz77zHi/osuxJiNGjODHH3/k4sWLAKSnp3P27NnL9qsISJycnMjNzeXHH380PlbTa9evXz927NhBamoqer2e1atXM2TIkDpdY2VZWVm4u7sD8M033xi3X3qtGRkZgCFoKykpAcDV1ZWLFy+SlpZGUVERv/32m3H/nJwc3NzcKCkpuWK3YUNJYCWEEOKG5OjoyMCBA/Hx8WHOnDnV7mNmZsaPP/7ISy+9hL+/PwEBAezZs6fBz+nv70+vXr3w9vZm+vTpDBw4sE7Hubu7M3fuXPr168fIkSPp2bMntra2AHzyySeEhYXh5+dHz549+eKLL2o9V8+ePXn77bcJCQnBz8+PUaNGcf78+cv2s7Oz49FHH8XX15cJEyYYuw7BkPl5/PHHCQgIoKCgwLjdzc2Nd955h2HDhuHv709gYCC33357na6xsgULFjBp0iQGDx6Mk5OTcfurr75KRkYGPj4++Pv7s23bNgBmzJiBn58fU6dOxdTUlPnz59OvXz/GjRtH9+7djce/9dZb9OvXj1GjRlXZ3piU2tJ2zSUoKEhtDbU7fp7yODcd3kHE1Ke567WnWro5QghxXTt58iQ9evRo6WZcM3Jzc7G2tqa0tJQ77riD6dOnc8cdd7R0s6571f2eKopyUFXVoOr2l4yVEEIIcQ1YsGABAQEB+Pj44OXlxYQJE1q6SaIaMni9GoqJxJtCCCFaRr9+/SgqKqqy7dtvv220yuCtycKFC/nf//5XZdukSZOqlLS41khgJYQQQrQi+/fvb+kmNJt58+Zd00FUdSQ1Uy2ZFSiEEEKI+pPAqjoSVwkhhBCiASSwEkIIIYRoJBJYVUMSVkIIIepj6NChtIayQc0hPDycjRs3Gu//8ssvLFq0qMXas2HDhipL+tRXXFwcPj4+jdYeCayEEEKIVqK2ZV6aU23tuDSwGj9+PC+//HJzNKtaVxtYNTYJrIQQQtyw4uLi6NGjB48++ije3t6EhIRQUFBQJQOVmpqKp6cnYFiE+YUXXsDX1xc/Pz8+/fTTy865adMm+vfvT2BgIJMmTSI3NxeAN998kz59+uDj48OMGTOM6+oNHTqUuXPnMmTIED7++ONq2xkTE0NwcDB9+vRh/vz5WFtbGx97//336dOnD35+frz++uu1XlfFucaMGUPv3r0ZPHgwkZGRgKGa+uzZsxk2bBgvvfQSBw4cYMCAAfTq1YsBAwZw6tQpiouLmT9/PmvXriUgIIC1a9eycuVKnn76aQDOnj3LiBEj8PPzY8SIEcTHxxvP/cwzzzBgwAA6depUZXmcS+Xm5jJixAgCAwPx9fXl559/Nj723//+Fz8/P/z9/Zk2bRp79uzhl19+Yc6cOQQEBBATE1Pjzy4uLo7BgwcTGBhIYGDgVVXQr42UW6iGdAUKIUQz++NluHCscc/Z1hduuXIXVXR0NKtXr+bLL7/k7rvvrrIO36WWLVtGbGwshw8fRqvVkp6eXuXx1NRU3n77bbZs2YKVlRXvvvsuS5YsYf78+Tz99NPMnz8fgGnTpvHbb79x2223AZCZmcmOHTtqfN5Zs2Yxa9YspkyZUmXJmk2bNhEdHc2BAwdQVZXx48ezc+dOPDw8qr2u++67jxkzZvDFF1/QtWtX9u/fz5NPPsnWrVsBiIqKYsuWLWg0GrKzs9m5cydarZYtW7Ywd+5c1q1bx5tvvklYWJhxzb6VK1ca2/P0009z//3388ADD7B8+XKeeeYZNmzYAMD58+fZtWsXkZGRjB8/nokTJ1Z7rebm5qxfv542bdqQmppKcHAw48ePJyIigoULF7J7926cnJxIT0/HwcGB8ePHM27cuBrPV8HFxYXNmzdjbm5OdHQ0U6ZMaZLuWwmsqqHQ8sv8CCGEaB5eXl4EBAQA0Lt3b+Li4mrcd8uWLTz++ONotYaPTwcHhyqP79u3j4iICOMagMXFxfTv3x+Abdu28d5775Gfn096ejre3t7GwGry5Mm1tnHv3r3GAOXee+/lhRdeAAyB1aZNm+jVqxdgyPZER0fj4eFR7XXl5uayZ88eJk2aZDx35WKkkyZNQqPRAIaFkB944AGio6NRFMW4yPGV2vnTTz8BhuDxxRdfND42YcIETExM6NmzJ8nJyTWeQ1VV5s6dy86dOzExMSExMZHk5GS2bt3KxIkTjWsHXvraX0lJSQlPP/004eHhaDQaoqKi6nV8XUlgJYQQouXVIbPUVHQ6nfG2RqOhoKAArVZLWVkZAIWFhcbHVVVFUWru11BVlVGjRrF69eoq2wsLC3nyyScJCwujQ4cOLFiwoMp5raysGtR2VVV55ZVXeOyxx6psj4uLq/a6ysrKsLOzIzw8vNrzVW7Ha6+9xrBhw1i/fj1xcXEMHTq03u2r/FpVbk9t6xSvWrWKlJQUDh48iKmpKZ6enhQWFl7xta9Q08/uww8/xNXVlSNHjlBWVoa5uXm9r6cuZIxVNRTpDBRCiBuap6cnBw8eBKgyHigkJIQvvvjCOLj70q7A4OBgdu/ezenTpwHIz88nKirK+AHv5OREbm5urWOMqhMcHGzsolyzZo1x++jRo1m+fLlxHFdiYiIXL16s8Txt2rTBy8vLuIyMqqocOXKk2n2zsrJwd3cHqnb32djYkJOTU+0xAwYMMLZv1apVDBo0qI5XWPV5XVxcMDU1Zdu2bZw9exaAESNG8MMPP5CWlgb889pf2p6afnZZWVm4ublhYmLCt99+i16vr3fb6kICKyGEEOISL7zwAp9//jkDBgwgNTXVuP2RRx7Bw8PDOID6+++/r3Kcs7MzK1euZMqUKfj5+REcHExkZCR2dnY8+uij+Pr6MmHCBPr06VOv9nz00UcsWbKEvn37cv78eWxtbQFDoHfvvffSv39/fH19mThxYo1BT4VVq1bx9ddf4+/vj7e3d5XB4ZW9+OKLvPLKKwwcOLBKEDJs2DAiIiKMg9cr++STT1ixYgV+fn58++23NQ7Gr83UqVMJCwsjKCiIVatW0b17dwC8vb2ZN28eQ4YMwd/fn9mzZwNwzz338P7779OrVy9iYmJq/Nk9+eSTfPPNNwQHBxMVFdXgLOGVKLWl46rsqCgaIAxIVFV1nKIoXsAawAE4BExTVbVYURQd8F+gN5AGTFZVNa62cwcFBamtof7Hz1Me56bDO4h+8FnGv/zYlQ8QQgjRYCdPnqRHjx4t3YxrQn5+PhYWFiiKwpo1a1i9enWNAZFoXNX9niqKclBV1aDq9q9PxmoWcLLS/XeBD1VV7QpkAA+Xb38YyFBVtQvwYfl+QgghhGiggwcPEhAQgJ+fH0uXLuWDDz5o6SaJGtRp8LqiKO2BscBCYLZiGD02HLi3fJdvgAXA58Dt5bcBfgQ+UxRFUeuaGhNCCCFuUAsXLjSOf6owadIk5s2bV+NYqGvVsWPHmDZtWpVtOp2O/fv3t1CLGkddZwV+BLwI2JTfdwQyVVWtKM16DnAvv+0OJACoqlqqKEpW+f7/dHS2cjJ0XQghREuYN28e8+bNa+lmNAtfX98aZydey67YFagoyjjgoqqqBytvrmZXtQ6PVT7vDEVRwhRFCUtJSalTY4UQQgghWrO6jLEaCIxXFCUOw2D14RgyWHaKolRkvNoDSeW3zwEdAMoftwWqzkcFVFVdpqpqkKqqQc7Ozld1EUIIIYQQrcEVAytVVV9RVbW9qqqewD3AVlVVpwLbgIr68Q8AFdMTfim/T/njW6+18VV1qD8mhBBCCHGZq6lj9RKGgeynMYyh+rp8+9eAY/n22UDLLXkthBBCCNGM6hVYqaq6XVXVceW3z6iq2ldV1S6qqk5SVbWofHth+f0u5Y+faYqGCyGEEFcrLi4OHx+fBh27fft2xo0b18gtalmZmZksXbrUeD8pKemKixs3pfDwcDZu3HhV5/D09KxSKLSpSeX1aijXVMelEEKI1qRiuZvWqrb2XRpYtWvXrt7L7zSmxgismpsEVtWRMVZCCHHD0Ov1PProo3h7exMSEsKJEycIDAw0Ph4dHU3v3r0B+PPPP+nevTuDBg3ip59+Mu6zYMECZsyYQUhICPfffz+FhYU89NBD+Pr60qtXL7Zt21bj8+fn53P33Xfj5+fH5MmT6devHxWrkWzatIn+/fsTGBjIpEmTjGsCenp68vrrrxMYGIivry+RkZEA5OXlMX36dPr06UOvXr2M1dlXrlzJpEmTuO222wEQQVIAACAASURBVAgJCSE3N5cRI0YYj6/Y7+WXXyYmJoaAgADmzJlTJaNX0zWtXLmSO++8kzFjxtC1a1defPHFWl/vJ554gqCgILy9vXn99deN20NDQxkwYAD+/v707duXrKws5s+fz9q1a43L5yxYsIDFixcbj/Hx8SEuLg6ACRMm0Lt3b7y9vVm2bFmtbWhKda1jdUORuEoIIZrXuwfeJTI9slHP2d2hOy/1femK+0VHR7N69Wq+/PJL7r77bg4fPoytrS3h4eEEBASwYsUKHnzwQQoLC3n00UfZunUrXbp0YfLkyVXOc/DgQXbt2oWFhYWxMvqxY8eIjIwkJCSEqKgozM3NL3v+pUuXYm9vz9GjRzl+/DgBAQEApKam8vbbb7NlyxasrKx49913WbJkCfPnzwcMCzofOnSIpUuXsnjxYr766isWLlzI8OHDWb58OZmZmfTt25eRI0cCsHfvXo4ePYqDgwOlpaWsX7+eNm3akJqaSnBwMOPHj2fRokUcP37cWF+qImgB+Pe//13tNYEhs3T48GF0Oh3dunVj5syZdOjQodrXe+HChTg4OKDX6xkxYgRHjx6le/fuTJ48mbVr19KnTx+ys7OxtLTkzTffJCwsjM8++wwwBLA1Wb58OQ4ODhQUFNCnTx/uuusuHB0da/3ZNwXJWAkhhLiheXl5GYOZ3r17ExcXxyOPPMKKFSvQ6/WsXbuWe++9l8jISLy8vOjatSuKonDfffdVOc/48eOxsLAAYNeuXcaq4t27d6djx47GIORSu3bt4p577gEMGRg/Pz8A9u3bR0REBAMHDiQgIIBvvvmGs2fPGo+78847q7QZDBmuRYsWERAQwNChQyksLCQ+Ph6AUaNG4eDgAICqqsydOxc/Pz9GjhxJYmIiycnJtb5OtV3TiBEjsLW1xdzcnJ49e1Zp56V++OEHAgMD6dWrFydOnCAiIoJTp07h5uZmXJy6TZs2aLX1y/188skn+Pv7ExwcTEJCAtHR0fU6vrFIxqo6Um9BCCGaVV0yS01Fp9MZb2s0GgoKCrjrrrt44403GD58OL1798bR0ZGEhASUWj4frKysjLfrU2Wopn1VVWXUqFGsXr261nZrNBrjuClVVVm3bh3dunWrsu/+/furtG/VqlWkpKRw8OBBTE1N8fT0pLCwsEHtrNyWS9tzqdjYWBYvXkxoaCj29vbGTKCqqrW+thW0Wi1lZWXG+xVt3r59O1u2bGHv3r1YWloag8qWIBmr6khgJYQQNzRzc3NGjx7NE088wUMPPQQYsjSxsbHExMQA1BjwANx8882sWrUKgKioKOLj4y8LdioMGjSIH374AYCIiAiOHTsGQHBwMLt37+b06dOAYSxWTVmvCqNHj+bTTz81BkGHDx+udr+srCxcXFwwNTVl27ZtxgyTjY0NOTk5V31NNcnOzsbKygpbW1uSk5P5448/AMNrm5SURGhoKAA5OTmUlpZe1h5PT08OHToEwKFDh4iNjTVej729PZaWlkRGRrJv3756tasxSWAlhBBCVGPq1KkoikJISAhgCLaWLVvG2LFjGTRoEB07dqzx2CeffBK9Xo+vry+TJ09m5cqVVbI6l+6bkpKCn58f7777Ln5+ftja2uLs7MzKlSuZMmUKfn5+BAcHGwep1+S1116jpKQEPz8/fHx8eO2112q8trCwMIKCgli1ahXdu3cHwNHRkYEDB+Lj48OcOXMafE018ff3p1evXnh7ezN9+nQGDhwIgJmZGWvXrmXmzJn4+/szatQoCgsLGTZsGBEREcbB6//f3p2HSVGeex//3rMxG9vIKouAgOKCKCOixhUNmqi4a47RaExINJoYNYa4vMZoNJ5jNDEqiUaDeoyJS1SOMe6K0bgiKCqy6QjDINuwDTDM9rx/VPXYDA0z1VO9/z7XNdd0V9XTfd/dXdV3Vz311CmnnEJtbS1jxoxh6tSpjBw5EoBjjjmGpqYmRo8ezTXXXMP48eMDxRUmS4dB0SsrK13kDIhUeupbP2TkrBlUnX8px/7s+6kOR0Qkq82dO5dRo0alOoztuuWWW1i3bh3XX399Qp+nubmZxsZGiouLWbRoERMmTGD+/PkUFRUl9HmlY2J9Ts1spnOuMtby6mMlIiLSxkknncSiRYt4+eWXE/5cmzZt4ogjjqCxsRHnHFOnTlVRlcFUWImIiLTxxBNPhP6Yzz33HD//+dad9IcOHcoTTzxBOhy1CdsBBxzAli1btpr24IMPsvfee6coouRQYRWDuq6LiEjYJk6cyMSJE1MdRtK8/fbbqQ4hJdR5XURERCQkKqxi0GgLIiIiEg8VVrGk/kRJERERyUAqrGLQDisRERGJhwqrWFRZiYjkrCFDhrBq1SoAysvLQ3vcJ598kk8++STu9lVVVey1116hxSOJocJKREQkCTpbWElm0HALMXTkQpAiIhKeL2+8kS1zd3y5lqC6jNqdfldeucNlTjzxRJYsWUJ9fT0/+clPmDx5cocfv66ujkmTJrFmzRoaGxu54YYbmDRpEgAPPPAAt9xyC2bG6NGjueCCC5g+fTozZszghhtu4PHHH+f888/nlltuobKyklWrVlFZWUlVVRVVVVWcffbZbNy4EYA77riDgw46KP4XQpJKhZWIiOSs++67j4qKCjZv3sz+++/PKaec0uG2xcXFPPHEE3Tr1o1Vq1Yxfvx4TjjhBD755BN+/etf88Ybb9CrVy9qa2upqKjghBNO4LjjjuPUU0/d4eP26dOHF154geLiYhYsWMC3vvWtrBxANFupsBIRkZRrb89Sotx+++2to6wvWbKEBQsWdLitc44rr7yS1157jby8PJYuXcry5ct5+eWXOfXUU+nVqxcAFRUVgWJqbGzkoosuYvbs2eTn5zN//vxA7SW1VFiJiEhOevXVV3nxxRd58803KS0t5fDDD6e+vr7D7R966CFWrlzJzJkzKSwsZMiQIdTX1+Oc61CXkoKCAlpaWgC2et7bbruNvn378sEHH9DS0kJxcXHw5CRl1HldRERy0rp16+jZsyelpaV8+umnvPXWW4Hb9+nTh8LCQl555RW++OILACZMmMAjjzzC6tWrAaitrQWga9eubNiwobX9kCFDmDlzJgCPPfbYVo/bv39/8vLyePDBB2lubu5UnpJcKqxERCQnHXPMMTQ1NTF69GiuueYaxo8fH6j9WWedxXvvvUdlZSUPPfQQu+++OwB77rknV111FYcddhj77LMPl156KQBnnnkm//M//8O+++7LokWLuPzyy5k6dSoHHXRQ6/AOABdeeCH3338/48ePZ/78+ZSVlYWXtCScOZf6YcYrKytdOnTMe+pbP2TkrBlUf/8yjr7se6kOR0Qkq82dO5dRo0alOgyRHYr1OTWzmc65yljLa4+ViIiISEjUeV1ERGQH5syZw9lnn73VtC5duvD222+nKCJJZyqsREREdmDvvfdm9uzZqQ5DMoQOBYqISMqkQz9fke2J5/OpwiomregiIolWXFzM6tWrVVxJWnLOsXr16sDjiOlQoIiIpMTAgQOprq5m5cqVqQ5FJKbi4mIGDhwYqI0Kq5h0EWYRkUQrLCxk6NChqQ5DJFQ6FCgiIiISEhVWIiIiIiFRYSUiIiISEhVWIiIiIiFRYSUiIiISEhVWIiIiIiFRYSUiIiISEhVWIiIiIiFRYSUiIiISEhVWIiIiIiFRYRWTLggqIiIiwamwEhEREQmJCquYdBFmERERCU6FlYiIiEhIVFiJiIiIhESFlYiIiEhI2i2szKzYzN4xsw/M7GMzu86fPtTM3jazBWb2dzMr8qd38e8v9OcPSWwKIiIiIumhI3ustgBHOuf2AcYAx5jZeOBm4Dbn3AhgDXC+v/z5wBrn3HDgNn85ERERkazXbmHlPHX+3UL/zwFHAo/50+8HTvRvT/Lv48+fYGY6zU5ERESyXof6WJlZvpnNBlYALwCLgLXOuSZ/kWpggH97ALAEwJ+/DtgpxmNONrP3zOy9lStXdi4LERERkTTQocLKOdfsnBsDDATGAaNiLeb/j7V3apuhzJ1zdzvnKp1zlb179+5ovCIiIiJpK9BZgc65tcCrwHigh5kV+LMGAjX+7WpgEIA/vztQG0awIiIiIumsI2cF9jazHv7tEuAoYC7wCnCqv9h3gKf829P9+/jzX3bO6eJ7IiIikvUK2l+E/sD9ZpaPV4g94px72sw+Af5mZjcAs4B7/eXvBR40s4V4e6rOTEDciaU6UEREROLQbmHlnPsQ2DfG9M/w+lu1nV4PnBZKdKmicxhFREQkDhp5PSZVViIiIhKcCisRERGRkKiwEhEREQmJCisRERGRkKiwEhEREQmJCisRERGRkKiwEhEREQmJCisRERGRkKiwEhEREQmJCisRERGRkKiwEhEREQmJCisRERGRkKiwEhEREQmJCisRERGRkKiwEhEREQmJCisRERGRkKiwEhEREQmJCisRERGRkKiwEhEREQmJCisRERGRkKiwEhEREQmJCisRERGRkKiwEhEREQmJCisRERGRkKiwEhEREQmJCisRERGRkKiwEhEREQmJCisRERGRkKiwEhEREQmJCqson+5/FAB1e+2b4khEREQkE6mwirJy0AiOPfEWGnfqk+pQREREJAOpsBIREREJiQorERERkZCosBIREREJiQorERERkZCosBIREREJiQorERERkZCosBIREREJiQorERERkZCosBIREREJiQorERERkZCosBIREREJiQqrKKdXDgLgwGE7pTgSERERyUQFqQ4gnYwbWkHVb76Z6jBEREQkQ2mPlYiIiEhIVFiJiIiIhESFlYiIiEhI2i2szGyQmb1iZnPN7GMz+4k/vcLMXjCzBf7/nv50M7PbzWyhmX1oZvslOgkRERGRdNCRPVZNwGXOuVHAeOBHZrYHMAV4yTk3AnjJvw9wLDDC/5sMTA09ahEREZE01G5h5Zxb5px737+9AZgLDAAmAff7i90PnOjfngQ84DxvAT3MrH/okYuIiIikmUB9rMxsCLAv8DbQ1zm3DLziC+jjLzYAWBLVrNqf1vaxJpvZe2b23sqVK4NHLiIiIpJmOlxYmVk58DhwiXNu/Y4WjTHNbTPBubudc5XOucrevXt3NAwRERGRtNWhwsrMCvGKqoecc//wJy+PHOLz/6/wp1cDg6KaDwRqwglXREREJH115KxAA+4F5jrnbo2aNR34jn/7O8BTUdPP8c8OHA+sixwyFBEREclmHbmkzcHA2cAcM5vtT7sS+A3wiJmdDywGTvPnPQN8A1gIbALOCzViERERkTTVbmHlnHud2P2mACbEWN4BP+pkXCIiIiIZRyOvi4iIiIREhZWIiIhISFRYiYiIiIREhZWIiIhISFRYiYiIiIREhZWIiIhISFRYiYiIiIREhZWIiIhISFRYiYiIiIREhZWIiIhISFRYiYiIiIREhZWIiIhISFRYiYiIiIREhZWIiIhISFRYiYiIiIREhZWIiIhISFRYiYiIiIREhZWIiIhISFRYiYiIiIREhZWIiIhISFRYiYiIiIREhZWIiIhISFRYiYiIiIREhZWIiIhISFRYiYiIiIREhZWIiIhISFRYiYiIiIREhZWIiIhISFRYiYiIiIREhZWIiIhISFRYiYiIiIREhZWIiIhISFRYiYiIiIREhZWIiIhISFRYiYiIiIREhZWIiIhISFRYiYiISMZzzvH60tepqatJaRwqrERERCTjbW7azAUvXsCzVc+mNI6ClD67iIiknUVrF3Hdm9fR0NwQuG1BXgFXHnAle+y0RwIiE9m+TU2bACgrKEtpHCqsRERkKzOXz2TWilkc0P8AivKKOtyuxbXwRs0bvPvluyqsJOk2NXqFVWlhaUrjUGElIiJb2dCwAYDbj7g90JdUi2thzANjqGusS1RoItsV2WNVWqDCKuEamxtbX/D2lBaWtla9ACUFJRTld/wXm4hIpqtrrCPf8ikpKAnULs/yKCsso65BhZUkn/ZYJdFrS1/jklcuiatt39K+vHDqC5hZyFFJLqitr+XHL/+YjY0bA7ftVdKLOybcQZf8LgmITGT7NjRsoGtR17i2e2WFZXF93kU6K/K5U2GVBCN7jGTKuCntLvfAxw9Qs7GGURWjmDR8Ev+p+Q+vVb9Gk2ui0AqTEKlkm3m18/hg5Qfs329/enTp0eF2a+rX8Nayt5i7ei5j+oxJYIQi26prrKO8sDyutuWF5ToUKCmhQ4FJNKjbIM7qdla7yz37+bPUbKxhSPchnDXqLBqaG3it+jUamxspzFNhJcFF+qpMGTeFkT1Hdrjdyk0rOfLRI5mzao4KK0m6uoY6uhZ1jattWZH2WElqRA4FlhWm9qxAjWMVJc/ytvofKaYaWxpTFpNktkhh1a2oW6B2vUt707e0L3NWzUlEWCI7tKFhA+VF2mMlmUV7rNJQpKDKt3xAhZV03vqG9QBx/frfu9fezFoxixlLZgRuO6z7MAZ1GxS4nWSPhuYGrnr9KtbUrwnc9tPaTxnff3xcz1tWWEZNXQ2rNq8K3LaiuKJ1Oyy565PVn/Dhyg8Dt3tr2VtA6vdYtVtYmdl9wHHACufcXv60CuDvwBCgCjjdObfGvJ6Ovwe+AWwCznXOvZ+Y0MMXWaENr8NmYb5XWDW1NKUsJslsGxo2kG/5cf2CGtt3LC8ufpGLXr4ocNsRPUfwjxP+EbidZI+q9VU8W/Usw7oPC9S/D2D3it05ZugxcT1v9y7dqVpfxRGPHBG47ckjTua6g66L63kle1z7n2v5tPbTuNr2K+vX+t2dKh3ZYzUNuAN4IGraFOAl59xvzGyKf//nwLHACP/vAGCq/z8jtO6xymuzx6pZe6wkPusb1sd9dtWZu5/J2L5jaXEtgdr98YM/Mm/NvMDPJ9llbf1aAK4efzX799s/ac/7g9E/YFTFKJxzgdpN+3gaS+uWJigqySSrNq/iuGHHcXnl5YHbxnsIO0ztFlbOudfMbEibyZOAw/3b9wOv4hVWk4AHnLdGvWVmPcysv3NuWVgBJ9I2e6x0KFB8s1bMYsmGJYHbzaudF3cn4IK8AkbtNCpwu16lvfho9UdxPadkj7VbvMKqe5fuSX3efmX9OH230wO3e/6L5/UjVnDOsbZ+Lf3K+rFTyU6pDicu8fax6hsplpxzy8ysjz99ABD97VPtT9umsDKzycBkgMGDB8cZRrgiexXUeV2iNbc0c/5z58f9OTiw/4EhR7RjhXmF+sxKa2EV9DBgqhTmF241OLPkpvUN62lyTfTs0jPVocQt7M7rsY53xNwf7Jy7G7gboLKyMtg+4wSJdFpvLazyVViJN6ZPY0sjk0dP5sThJwZu36e0T/sLhagor0i//CXjCquivCIaWoJf9FmyS+Rki57FuVdYLY8c4jOz/sAKf3o1EH0q0kCgpjMBJlMeWw+3UJDnvTwqrHLb+i3emX2Dug5iUNf0P9OuMF97rLLJmvo1PLnwycAn0byx9A1KC0oz5pJcRflFNDSrsMoW67as49H5j7Juy7pA7SJnk+ZiYTUd+A7wG///U1HTLzKzv+F1Wl+XKf2rYAfDLejXf06LDJkQdCyqVIkcCnTO6VJMWeDpz57m1pm3xtV2bN+xIUeTOEV5RfpBkAVaXAvTF03ntpm3UVtfG/h6k+Bdzmt4j+EJiC45OjLcwsN4HdV7mVk1cC1eQfWImZ0PLAZO8xd/Bm+ohYV4wy2cl4CYE6a187pt3Xldu6dz27oG7xdXsjsBxyvyuW1qaUr5acfSeas3r6bACnj7rLdbT6zpqMgZzplAe6wy37zaefz67V8za8Us9um9D386+k/sXrF7qsNKuo6cFfit7cyaEGNZB/yos0GlSmvnddr0sdIeq5yWiXuswDuErcIq863dspYexT0y5pBevArzCvUjNkNtaNjAnbPv5OFPH6Z7UXd+ddCvmDR8Us4O9qqR16O0dl7P01mB2cg5x/w189nctDlQu7mr5wIZVFjppIussqZ+TUb3N+ko7bHKPM45nv7saX773m+pra/l9N1O5+J9L86YvfuJosIqyjZ7rFRYZZX3V7zPuc+eG1fbwrzCjNlY6HObXdZsWZPRp553VFG+zmbNJAvWLODXb/+amctnsnevvblzwp3s2WvPVIeVFlRYRdlmuAV9QWWVmjrvBNXrD76e3iW9A7XtU9qH4oLiRIQVOp10kX5aXAuPzX+s9aLcQSxevzijOqHHqyiviCbXRItrydlDSJmgrqGOqR9M5aG5D1FeVM61B17LySNO1nsWRYVVlMgHQ4VVdoqMj3LEoCMyZu9TPDRMSPqZv2Y+1791fdzt4xmBP9NE+pA1NDdkzI+YXOKc41+f/4tb3ruFVZtXcfKIk7lkv0voUZwZ46QlkwqrKJEzblqHW1Dn9ayydsta8i0/Y/pKxUt9rNJPZE/VXRPuiuu6fblQaLQWVi0NFJP9+WaSRWsXcePbN/LOl++wx0578Lsjfsfo3qNTHVbaUmEVQ9vhFvQFlR1q62vp0aVH1o/tpM9t+qlvqge8ITtyoUiKR1HeV3usJD1satzEHz/8Iw9+/CAlhSVcfcDVnDry1IwaxiMVVFjFEDkUGPkF9YdZf+DuD+8O/DiThk/iiv2vCDU28TpNrty0MnC7z9d9nhNnV6mPVfqJnIkaz2CJuSKyvdXnNvWcczz/xfP897v/zYpNKzhp+ElcMvYSKoorUh1aRlBhFUOksOqS34Up46aweP3iwI8xo3oG7y9/P+zQcl59Uz2nP3164Mt7RBw68NCQI0o/2mOVflRYtS9yCFtjWaXW5+s+56a3b+LNZW+yW8/d+O1hv2VMnzGpDiujqLCKIfrshrNGnRXXY6zcvJJFaxeFFZL46hrraGpp4rw9z+PIwUcGbj+0+9AERJVeWn/5q7BKG5HCSocBty9yKPCCFy+gS36XQG3zLI8p46bE1X9NPJsaN3HPnHuY9vE0ivOLmTJuCmfsdkbryTDScXrFYoh0Xu+MkoKSwANRSvs2NW4CYETPEfoVtR3aY5V+ItuC0oLSFEeSvvbrux/HDzue+ub6wG1f+OIF3vnyHRVWcXDO8fLil7n53ZtZtnEZJ+x6Aj8d+1N6lfRKdWgZS4VVDEGvxxWLCqvE2NTkFVb6gtq+SGH17pfvxvUZHNt3rPpShEx7rNrXq6QXNx5yY1xtxz00js2N2t4GtXj9Ym565yZeX/o6w3sMZ9ox03JizLREU2EVQxhnPJQWlLaeCSThieyxKilUX5Xt6VncE8O476P74mp/6shTufbAa0OOKrdtbtpMcX6xBlFMkJKCktYfXdK++qZ6/jznz9z30X0U5Rdxxf5XcObuZ7b+KJPOUWEVQxh7rIoLiqlvrtcowiHTIZX29Svrx/OnPt968eggfvrKT1lbvzYBUeW2zU2b1XE9gVRYddyrS17lN+/8hqV1S/nG0G9wWeVl9Cntk+qwsooKqxjCKIQiG9H6pnpKC1UEhCWy8dSX1I71K+tHv7J+gdv1KO7BxsaNCYgoOzS3NLNw7UKaXXOgdss3LtdhwAQqLSzVocB2LNmwhJvfuZkZ1TPYtfuu3DfxPvVJSxAVVjGEWVhtatqkwipEkUOBek0To6ygjI1NKqy25+FPH+bmd2+Oq+2oiuy/LE2qlBaUao/Vdmxp3sJ9H93HvXPuJc/yuGzsZZy1x1k67JdAKqxiCHuPVbZyzvHo/EdZuTn4YJ0Ahw08jL167RWojTqvJ1ZZYVnc72cuqK6rpqSghJsPCV5cDe85PAERCaiw2p5/V/+bm965iSUbljBxyEQur7w8rj3ZEowKqxjCGm4ByOozA6vrqjt1YdkZS2bwyPGPBGrT2nldhwITorSwVIcCd6B2cy29S3pzxOAjUh2KRCkpKGFV/apUh5E2aupquPmdm3l5ycsM6TaEu4++mwN3PjDVYeUMFVYJEvnin/rB1MCnrvcr68f5e52f9te0W1q3FCCuY/V/nvNnfv/+7zns74cFare5aTOGqb9KgpQVlqmw2oHaLbUaiiINlRaWtv7oymUNzQ3c//H93P3h3ZgZP9nvJ5yzxzmtgwZLcqiwSpBhPYYxoHwAM5fPDNRuS/MWNjZu5Phhx9O3rG+CogtHTV0NADuX7xy47ckjTmb15tVxDWK5a49ddaZlgpQWeF9Qzrm0L+xToba+loHlA1MdhrRRWlDKui3rmL5oeuC2e/faO2OuyHDn7DuZs3LOdudXra9iad1Sjt7laH5W+TP6l/dPYnQSocIqQQaUD+DZU54N3O7f1f/mwpcupGZjTVILq8/WfsZdH9xFc0vHz3aqWl9FvuXTtzR4nBXFFfx83M8Dt5PEKisso8k10dDSEPiyIpliY+NGfv/+7+Paw1G9oZrRvUYnICrpjP7l/alrrOOq168K3HZM7zE8+I0HExBVuJxz3DvnXnoW96Rfaex+UgPLB3L1+Kv52oCvJTk6iabCKs0M6DoA8Dbg+/bZN2nP+8/P/8nzVc+za49dA7WbNHySriWVRcoKywD4+mNfD7xXcN8++3Lr4bcmIqxQvbXsLR7+9GF6l/QO/NmtKK5QX5U0dP5e53Ps0GNpcS2B2t3+/u2Bjyqkypota2hsaeS7e3037mvYSnLoGzHN7FzmHVa7deatcY2cffDOB3P5/pcHble1ropBXQfxxKQnAreV7HHk4COpWl8V+BDtvNp5vLT4JRqbGynMT+/TuKvWVQEw/cTplBeVpzYYCYWZMaB8QOB2Q7sP5bmq5zLic7ti0wqAuI4QSHKpsEozxQXFXLDPBSxYsyBw26V1S3lw7oM4XOC9DbNWzGLUThpnJ9f1K+vHlQdcGbjd9EXTuer1q6jZWMMu3XZJQGSxLVq7iGc+fwbnXIfbvFHzBr1LequoEvqV9cPheKPmDXYq3ilQWzNjZM+RcXUMr95QzZxV2+8rFcu82nmACqtMvuZErwAAGYVJREFUoMIqDV045sK42lVvqObbz3ybR+c/Glf7g3c+OK52IoO7Dgbgng/vaT2cHcThAw+Pq7D/3czf8Wr1q4GHSDlu2HGBn0uyT+Rze/HLF8fV/tw9z+WyyssCt7vitSsCF1YABXkFDOyqkyfSnQqrLDKw60BePePVVIchOWjXHrtSUVzBU4ueiqv9XbPviut6Zas3r+bM3c7kqvHBOy2LjO07lvsm3hfXeIPTPp7G3+f9nXe/fDdQO4fjk9WfcN5e53Hi8BMDte1W1I2exT0DtZHkU2ElIp3Wtagrr57+Ko6OH5KLWL5xOdM+nkZ9c/CrFORbPmfvcXbgdiLgHc6L93p53bt0554P7wncYR7gqMFH8e1R39bFj7OUCisRCYWZYQQf+6p/eX9+ccAvEhCRSOLs03sf7phwR6rDkDSkURZFREREQqLCSkRERCQkKqxEREREQqLCSkRERCQkKqxEREREQqLCSkRERCQkKqxERETSxaZaCHCJpq2s+QLqVkBL8LG1JDwax0pEJBfVzIK1i8N/3LwC2PVIKCwJ/7GTyTn45EnYuArMIDJGW+R22/87mmeR8d382/mF0GMwtL2m6+Y18NBp0G1n6LNHsHg3fAk173u38wqgvC907Qdd+0f977/1tJKeUbGlCedg8Zvea+Fa4isy++wBvYaHH1sHqbCS9q1aAEtnxp630wgYODa58UhqOOd9DrZsiD2/5y5QMSy5Mc16CGb8hq0GfN/meyJqwlZfIm0W7Mg8y4PdjoW+e3XwcdqGsp3n6DkEBuy3/XZha9gE906E5i2JefyJN8GB8V3zNC6N9bDi4x18DqILnxgLxJq+aj48fn6oYXZYwyZYtyR4u33Phv77wIZlXqG1YRnUfgZVr0P92m2Xz+/iFVjddo5RhEX979K18zm19dkMWL1g2+kr5sK7f+7cYx91HXztks49RifkRmG14EWY3oGLbJbnQXEePHcVTL/Sm7bTrnDOdMjL4aOmj38Pls3e/vzSXpAX7CK4ABzwAzgk+AVM47ZiLtz7dWjcFLxtzyFw4VveL81Mt+FLqFsee97KeTD7IWhq2HbexpWxN4QRJRVw2TwoKAonTvCKuaYdXOpm5l+guQmGHR7daOv2sabHO2/VfHjj9zuKOD75XeCKReF/gTU1QHOM93Lpe15R9c3fwuADw33Ov54BX7wBo8+g9bVrfT2j7+9oXsBlX7sFZv9viEn4unSDC97w3h9cjFhcm//EntZ23pYNUPdl7OfsMRj67R1+Lo2b/WLry60Lr8jtL+fA/OehceO2bYu6+oVWjKIrUpSV94PC4o7H8tCpsT+bAMOPggn/z9+jF8cetfK+wduEKDcKq/LeMHxCu4tdvPA56ppbmNhcDsMP8XaTfz4DVi+E3iOTEGgaatoCyz+C/b8HB/5o23lzHoNNq4I/7oIXYd6zyS2sls6ELeth3A+gS3nH262YC/OegfVLvQIrE9Svh6d/Cg11W09v2uJ9pnd0fbMu3bxfvW117QejjoMRX2ebjV3NLHjuF3D3YVDQJXi83QdBcbdtpy/7wNvg78jXLoWjrg3+nPFwDtZ+AS3NX93feoGtl401ve28mlnw5A9h+o+91zjQ4+5gXsNG+OSp2F+UEbsfD11D/hIaNA4+ehw+fTrcx23PHpNgzLf9Ox14HzoyvWKYV+hkg8ISqBjq/e3Ilg1bF1/ra7a+v+Rt73+svZ0lPbctvMr7bfuDdN0Sr6g6+c8w7LBtH6e0V0bvzMiNwqr/PjCp/Ws69X1gEr/77FXY91hv+ZXz4M5x8MTk+CrgEV+H/aN2JTc3wfI57XcsLNtp+1/gj5wDn/6zzcT2DkV0Yr5z0NIEQw6JfZhnwjWx42zPkz+ChS/G13b+816RE1FYCnudAvntfJzXLvZ+AX39hmB7VT6b4RVWaxcHL6zWVcODJ3lfckF1Hwhn/hXKegVv+9mr8NFj0HvUtrmOPBZGnwb5sV4Dg10O9DaQQQzYzyvAN64MHmtLs7dHNFKsRCvoAgdetP3XIK8A9vlW8OeMl1n4xXXPId5esG3WB4t5c9t5O1if++0Fu31j2748AD0GhV9UAUy4FgaNjx1bdF+j1vttD9PFuN/ePMvztrclPcLKInd16er99Rqx/WWc8/pARe/xav3v3145z7vtYqzXAEXlMOKo4NuaDJAbhVVHFZZu/X+nEd6voDVV3gcliI2rYdHLMO9fX01bNd/7tdue/CLvkEppxdbTG+vh02dg0AHe31Z2dChjO8vEXC7GMoWlMHJiO0EH1GOwtyu8YaO/m72D1nwOfz1t2+n5BV5xtXE1rK+O3fbLOdB15+CHqiK/WKvfDV5gf/QP730ffWb7hV80B3z4d/jHZBh5TLDnBO8QmeXD5FeS04m4oAuceFfinycbFRbDj95KdRTh6bkLHDA51VFIIpl530+lFdB3z+0v19LsneXY0rTtvC7liem7lQZUWEUr8I8PR44T5+XB6Q/E91hrF8MTP4RNq7+aVtYbxp7bpuNrG6sXeodUPnnyq74P65bC67d61X9LI4y/0Dskk8l67uL9v3Hn+Nr/4N/e64mDqQd7/eLemgo1s73XaHuGxtjt3J5uA7zi76VfeX9B7TQCTv5T8Ha9R8IL/w8WvRS8LUD/MZl/ZpaIZK68fK8rTo5RYRUtUlgVhPBl1GMwnPdM8HaNh8BL13n9Y6JZnrdLf/CBbTrqZqjdvwlHX+/1+Qlqp12h/+iv7h95Fcz1+3OMOt4rOgu204my/5jgz1dQBOf9C9ZWBW8L0G90+8vEcvBPYL/vxD5EtkPO28vafWB8zysiInFTYRUtcty+o2c2JEJhCXz3We+LMVr/Me13OswkXbrCwT8O57H2/573l0gDx6ZmWIl4+4zE0y9LREQ6TYVVtEh/ozD2WHXGzvt6fyIiIpJRMvd8xkRK5R4rERERyVgqrKJFDgXmaUeeiIiIBKfCSkRERCQkCSmszOwYM5tnZgvNbEoiniMhtrn8gIiIiEjHhV5YmVk+cCdwLLAH8C0zC3iZbhEREZHMk4g9VuOAhc65z5xzDcDfgEkJeB4RERGRtJKIXtoDgCVR96uBttdfwcwmA5MBBg9Ok4tcHj7FuzjkHiekOhIRERHJQInYY7XN5UKJcQE659zdzrlK51xl795pMuR9z13g3KehuHuqIxEREZEMlIjCqhoYFHV/IFCTgOcRERERSSuJKKzeBUaY2VAzKwLOBKYn4HlERERE0krofaycc01mdhHwHJAP3Oec+zjs5xERERFJNwkZYtw59wzwTCIeW0RERCRdaeR1ERERkZCosBIREREJiQorERERkZCosBIREREJiQorERERkZCosBIREREJiQorERERkZCosBIREREJiQorERERkZCosBIREREJiQorERERkZCosBIREREJiQorERERkZCosBIREREJiQorERERkZCosBIREREJiTnnUh0DZrYS+CLBT9MLWJXg50gXuZBrLuQIyjPbKM/sojyzS5A8d3HO9Y41Iy0Kq2Qws/ecc5WpjiMZciHXXMgRlGe2UZ7ZRXlml7Dy1KFAERERkZCosBIREREJSS4VVnenOoAkyoVccyFHUJ7ZRnlmF+WZXULJM2f6WImIiIgkWi7tsRIRERFJKBVWIilmZpbqGEREckEytrdZVVjl0heUmWXVexeLmQ1IdQwiEpu2t9lF29vwZNuHJdvy2YqZnWBml6Y6jkQzs6PMbCbww1THkkhmdryZ/Q2YYma7pDoe6Twz2y0XvoR9WZ2ntrfZJZnb26xYMczsm2b2NHC9mR2c6njCZmYFZvZz4HbgFjMb45xrMbP8VMcWFvMUmdldwC3A9c65a6Lnpy668JnZUcA1wDSgALjYzL7pz8uK9TLCzE40s+tTHUcimdnRZvY28D2yZLu6PdreZj5tbxO7vc34DYCZjQWuBe4CPgS+Y2bn+vMyPj8A51wTMA/YHbgU+JM/vTmVcYXJeRqAUuBJ59yTZpZnZvtE5qc2wtAdBTztnHsW7/3sCnzXzMqccy2pDa3z/A13vpl9D2/DPcXMDkl1XGHycyw0s1/hbX9uds79zF9fs+7LCbS9TWVcYdL2NrHb22xYEY4C/u2cewZ4CvgSrxrt7v/KyMiNm5n92Mx+Y2an+5P+6Zyrd879DuhjZv/lL1eYuig7LyrPM/xJ1wOHmNktwPvADWZ2t5lNTF2UnRfj/fwPcJCZFTvnVgD1QD5wXsqCDJG/4W4GFgL7AhfivbdZw8+xEWgBHnPO/QPAzA7J9PVyB7S9zWDa3iZne5txhZX/gt1jZt/3J70CHGdmPZ1zm4FGYD1wBWRe5e3/Cv4pcAbwHnCd/4uwZ9RilwL/A+Bv2DNOjDx/aWbnO+cWAU/i/Vo8A/gv4CPgJDPrlbKA47Sd9/M7wKfAMuARM3sF6Ib3RdUtk3/5R62f3/MnzXDObXDO3QOUmdn5/nLZkONkf9Ifgf5m9hczm4O37bkX+K6/fEYWG6DtrU/b2wyRNttb51zG/AHnAm8BxwAzgKuAPsAfgKeBfwN/ASYCU4GyVMccZ57TgSP828cAtwFnt1nmFeBy//ZRqY45pDxvB07375dHLXco8FegNNUxh5Dnsf77eRreL6Z9gW/6884C7kl1vJ3Is+36+Qtg16j5xwIfAz1THWuIOV6N9yV8IvAQ3heUAZOAfwKDUx1ziLlqe+u0vU33v3TY3mbar8YJeP0YngUuA4qBc5xzF+MdaviVc+48vN18xc65jakLtX1tf8lGVc7vAYcA+LnOB/Y0s92iFr8A+G8z+xJI69NkA+Q5F9jPzHZzztVFNTka2IT3vqatDub5L7z3sxKv6JjlnPunv9xY4O0khZsIsdbPsyIz/dznApPNrKuZnZaaMDulbY5dgB84554EJjvnPnXeVvtDYC3eHp1MlVXb27aydXvbVrZub9tKp+1tRhRWUS/YLOA4AOfce8AbwAgz+5pzbrFz7gV/uW8Ci5IfaWAl0XfcV53oFgJdzWxv//4MoDtehzvMbAxwD/A4sJ9z7v7khBu3ePM808w+AnYBrnTp36k7SJ7d/D/M7Btm9g5eno8nKdbQ7GD9fBPY2bY+c+znwE3AAqBfMuPsjHa2QUPN7OA2hcV38D4Pa5IaaAiydXtr/ll9kR9A2bq97USeGbW9DZhnUre3aVlYmdnBZrZr5H7UC/YGkGdmh/r3PwJqgP5+u0PNbAYwAq/fQ1oys/Fm9jhwp5l9PeoDUuAv8g7QDBxtZgXOuU/wfiVV+vNXAxc6505zztUkO/6OCiHPL4ALnHPnOK/DYVrqRJ77+/MXAD90zp3inEv7L+IdbNBirZ/LgJ395YfjnU32JN4X1B+SGXcQAXOs4ascTzGzD4BheJ/dtP/VH0eumba9PdjM7geuNrMKf49idEf0bNnedjbPTNnexptn0ra3aVVYmdl+ZvY88DJeJR2ZHolzAV4fjTPMLN85V433q3eoP78KbwU4yTm3KnmRd5yZHY735fIPvFN6vw30NLM855+m7ZxbCLwLDAem+E234H3wcc4tcc7NSXLogYSU55vOuX8nOfRAOplnlT9/gXPu/eRGHpyZHWhm9wA/NbNuURu0SAG5vfVziD9/HXCRc+7kdP2C6kSOkW3QfLyN9jnOueXJjj+IEHKtIv23t8Pw1s9X8PZQXG9m34CvOqJnyfY2jDwzYXvbmTyr/PkJ396mRWFl3lgwfwLuxutQ9xxwuD8vP+oX1Aa8DpNFeAO3FeJ1HF0F4O+e/jjJ4Qc1GnjXOfcQ8L9AIVAXydHMbjCze4GZeK/FOPNGxa3Fe10yhfIke/L091rcgfejZ2fgF2b2dWgd9we2v36u9pdb6ZxbkOzYO6qTOUa2QXOcc28mO/agQso1E7a344C5zrlpwOXAbOB4M4vsdcuK9RPlmV55uvToxV+O18m1xL9/Lt4x7YKoZa4DHsU766Y/3giqb+EN9pWf6hx2kNt4YGTU/TF4b/K1wHLgVeA+vNNDD8I7G2N4m9emR6rzUJ65lWeMvC8FbvdvV/g5/gXo60+7IRPXz1zLMdtzBY4HLgLG+/eH4R3SHOzf3wOvj98lwNcydf1UnumdZ8r2WPn9Ukb6dzc65x5y3rgo4J0W2eycazLPaLzj+FOcd9bNMrwxYo52zv3ApeGIuGbWw8z+CbwAnG5m5QDOudl4p7rugrcb/XC8D8pReK/DfznnFkYOfzrn6pxza1OSRAcoz+zKM6LN+gneYc7uZrazc64WqMM7I26Sv9wwMmj9hNzIMSLbczWz/mb2f8DP8Paq/cXMJjrnPsM7iSJyBuo84BO8riZzMm39VJ6ZkWfSC6sYX1BlzjnnF1CReGbgDVDW03llZ+QFWxT1grU45zYkO/4AyvB2PV7s3269nIdz7h2gN/6xbbzd8T3wzyDy++ek9RkZUZRnFuW5vQISr7/NemCaeR31B+GdNdbNOTc/k9bPXMgxIodyrQRed84d6py7Hvg9EBnA9XVgbzM7wC8KlwKHOufWQWatnyhPyIA8U7HHqu0X1KHQenmIFn9FrvKXOSwyD9LjBdsRMzvHzA7zO4Iuxesz9gjeeCAHmFnkzKEueEPsX+g3nYC3O74etjorJy0pz+zKs43trZ/z8cYyugl41Dl3Et5ZYkdEGqb7+hklF3KMyNpc/fXzcH/9ewl4IGr2arwTCcA7hDkLuM0vLPcEvjCzUkj/9VN5AhmWp/k1S2KfxOwcvF/zs5xz682sGK+o+xneKMV3O+dqzMz8vVfleD3//+qcezYyPeGBxsHMDO9Mmb/iXTNsEd4G7CfOP1PGvHF8Tgfec8496E/bE69fTj+8QQQvcs7NTX4GHaM8syvPaB1dP2O0uxpY65y7I6kBxyEXcozI5lzbWz/NrNA512hmPwb2cM79MKrtrcBAvMP25zjn5iU/g45Rnhmep0tcpzPD6/T4Cl71eTfeJR96RS1zMN4uvm9HTcv3/z8I/DJR8YWUYyTWkcD/+rcL8C758I82y/4Ur0NoD77qpF8CDEt1Hsozt/L0Y41r/fSnfw3vrJvngCGpziWXc8ylXDuyfkYt83/4l54B+kQt2zXVeSjP7M8zIYcCzRsiweGN6LrUOTcB7zBJLd4KD4Bz7g28w367m1l3Myt1X3WM/K5z7peJiK+zzKzAzG4EbjSzw4Dd8AYkw3mnKv8YONCfF3EP3hkKLwBVZjbAObfZeZ3x0pLyzK48Izqxfpb5sz4DrnHOTXTOVSU1+A7KhRwjsj3XIOunc67ZzIqAlcB8M/s18IJ5/XWbXBr3E1Oe2ZNnqIVVCF9Qn0f6rbg0vYq4H/tMvDMVFgLX4x36OcLMxkFrn7BfAb+MavpNvI3dbGBv5/XZSVvKM7vyhFDWz8/MbKBzrsY590ySw++QXMgxIhdyDbh+Xuc3K8YbsuclvGLzKJfmVzRQntmVZ5i79Q4DPsC7yvn3gdfwTkNfDIyLWu4C4JWo+2cADXgrfJ9k77KLI89DiLryOV5fsAvw3viZ/rQ8vOPGj+DvWse72v2hqY5feeZsnlm/fuZCjrmWaxzr50C8QSQfAMakOn7lmaN5pvAFG+JPy7QvqFK88V4ix37PAm7yb88GLvZvVwIPpzpe5ak8/fizfv3MhRxzLdeA6+ffUh2v8lSezoXbx2om8Ij5F/Tkq9FRpwH5Znax806DHIg3+GcVgHPuKefcayHGkVDOuU3OuS3uq75gR+Md/wU4DxhlZk8DDwPvw1cXN80kyhPIojzJjfUzF3KMyIlcA66fMyEz10/lCWRRngXtL9IxzrlNbSYdDXzo3z4P+L7/gu2G36HSLH2HUWiPv0FzQF9guj95A3AlsBfwufP73WRqjqA8yZI8c2H9zIUcI3IpV8j+9TNCeWZHnqEVVhHZ/oJFacG7OOkqYLSZ/Q5vELOLnXOvpzSycCnPLMozF9bPXMgxIodyzYn1E+WZFXmGXliR5S9YhHPOmdm+eMeIhwJ/cc7dm+KwQqc8s04urJ+5kGNETuSaK+un8swOCRl53czG413i4z9k2QsWzcwGAmcDtzrntqQ6nkRRntklF9bPXMgxIldyzaH1U3lmuEQVVln7golkulxYP3Mhx4hcylUkEyTlWoEiIiIiuSAhl7QRERERyUUqrERERERCosJKREREJCQqrERERERCosJKRDKamf3SzC7fwfwTzWyPZMYkIrlLhZWIZLsTARVWIpIUGm5BRDKOmV0FnAMswbuI60xgHTAZbyTyhXhjO40BnvbnrQNO8R/iTqA3sAn4vnPu02TGLyLZS4WViGQUMxsLTAMOwLss1/vAH/FGHV/tL3MDsNw59wczmwY87Zx7zJ/3EvBD59wCMzsAuMk5d2TyMxGRbJSIawWKiCTSIcATzrlNAGYWufjwXn5B1QMoB55r29DMyoGDgEfNLDK5S8IjFpGcocJKRDJRrF3t04ATnXMfmNm5wOExlskD1jrnxiQuNBHJZeq8LiKZ5jXgJDMrMbOuwPH+9K7AMjMrBM6KWn6DPw/n3HrgczM7DcA8+yQvdBHJdupjJSIZJ6rz+hdANfAJsBG4wp82B+jqnDvXzA4G7gG2AKcCLcBUoD9QCPzNOferpCchIllJhZWIiIhISHQoUERERCQkKqxEREREQqLCSkRERCQkKqxEREREQqLCSkRERCQkKqxEREREQqLCSkRERCQkKqxEREREQvL/AdP8dM+Na56uAAAAAElFTkSuQmCC\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "mean_df.loc[:,actual_col_list].plot.line(title='Avg power generated across regions',figsize=(10,10))\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "similarly, we can plot, daily power generated across all regions" + ] + }, + { + "cell_type": "code", + "execution_count": 46, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 46, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "(power_df\n", + " .groupby('date').sum()\n", + " .loc[:,actual_col_list]\n", + " .plot.line(\n", + " title='Total power generated across all regions', \n", + " figsize=(10,10))\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "4g9NzXApq4kP" + }, + "source": [ + "## With data cleaning and some basic eda done, what other questions would would you like to get the answer for?\n", + "\n", + "## Come up with atleat 3 such questions and find the anwers for the same individually before the session ends.\n", + "\n", + "## Try forming as many questions and their answers later as per your availability and share your notebooks on slack." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "tmEQ2aV3q5he" + }, + "source": [ + "## Some ideas\n", + "- Clustering, [example in R](https://www.kaggle.com/aishwaryasharma1992/exploratory-data-analysis-with-k-means-clustering)\n", + "- Additional plots, [example using Plotly](https://www.kaggle.com/shakka/eda-and-visualization-using-pandas-plotly)\n", + "- Regression, [example linear reg](https://www.kaggle.com/kuroganedecimo/power-generation-eda-linear-regression)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "colab": { + "name": "daily_power_generation.ipynb", + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.0" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/Data Exploration of Automobile Data/notebook/Data Exploration of Automobile Data_MK.ipynb b/Data Exploration of Automobile Data/notebook/Data Exploration of Automobile Data_MK.ipynb new file mode 100644 index 0000000..1ce7d4c --- /dev/null +++ b/Data Exploration of Automobile Data/notebook/Data Exploration of Automobile Data_MK.ipynb @@ -0,0 +1,2029 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "

\n", + " \n", + " \n", + " View in Colab\n", + " \n", + "

" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Resources used in the session:\n", + "* [What is EDA?](https://www.itl.nist.gov/div898/handbook/eda/section1/eda11.htm)\n", + "* [Categorical Encoding](https://pbpython.com/categorical-encoding.html)\n", + "* [Pandas Visualization](https://pandas.pydata.org/pandas-docs/stable/user_guide/visualization.html)\n", + "* [Data Science Lifecycle](http://sudeep.co/data-science/Understanding-the-Data-Science-Lifecycle/)\n", + "* [TDSP-documentation](https://docs.microsoft.com/en-us/azure/machine-learning/team-data-science-process/lifecycle)\n", + "* [TDSP-presentation](https://slideplayer.com/slide/13392497/)\n", + "* [ML Code in production system](https://images.anandtech.com/doci/14466/DataPipelineSculley.png) or [full article](https://www.anandtech.com/show/14466/intel-xeon-cascade-lake-vs-nvidia-turing)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Data Exploration of Automobile Data\n", + "\n", + "Cars contain 50 or more sensors which collect data on speed, emissions, fuel consumption, usage data for resources, and security. All these data can be used to find patterns and resolve quality issues either in the nick of time or prevent them from happening altogether. Analytics is being used to increase both customer satisfaction and quality management at a cost-effective level. In this session we are doing some basic analysis of automobile data which the learners can further expand on.\n", + "\n", + "\n", + "#### Data Set 1:\n", + "\n", + "Cleaned data.\n", + "\n", + "#### Data set 2:\n", + "\n", + "Raw data with missing values.\n", + "\n", + "#### Data Description:\n", + "\n", + "[Source: UCI ML Repo](https://archive.ics.uci.edu/ml/datasets/Automobile)\n", + "\n", + "This data set consists of three types of entities:\n", + "\n", + "(a) the specification of an auto in terms of various characteristics\n", + "\n", + "(b) its assigned insurance risk rating\n", + "\n", + "(c) its normalized losses in use as compared to other cars.\n", + "\n", + "The second rating corresponds to the degree to which the auto is more risky than its price indicates. Cars are initially assigned a risk factor symbol associated with its price. Then, if it is more risky (or less), this symbol is adjusted by moving it up (or down) the scale. Actuarians call this process \"symboling\". A value of +3 indicates that the auto is risky, -3 that it is probably pretty safe.\n", + "\n", + "The third factor is the relative average loss payment per insured vehicle year. This value is normalized for all autos within a particular size classification (two-door small, station wagons, sports/speciality, etc...), and represents the average loss per car per year.\n", + "\n", + "Note: Several of the attributes in the database could be used as a \"class\" attribute.\n", + "\n", + "\n", + "No of instances : 205\n", + "\n", + "No of attributes : 26\n", + "\n", + "\n", + "\n", + "\n", + "#### Attribute Information:\n", + "* mpg: continuous\n", + "* cylinders: multi-valued discrete\n", + "* displacement: continuous\n", + "* horsepower: continuous\n", + "* weight: continuous\n", + "* acceleration: continuous\n", + "* model year: multi-valued discrete\n", + "* origin: multi-valued discrete\n", + "* car name: string (unique for each instance)\n", + "\n", + "Missing Attribute Values: normalized-losses and horsepower have missing values\n", + "\n", + "\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "import seaborn as sns\n", + "from sklearn.preprocessing import Imputer,LabelEncoder\n", + "from scipy.stats import norm, skew\n", + "import matplotlib.pyplot as plt\n", + "import warnings\n", + "warnings.filterwarnings(\"ignore\")\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Part A: EDA \n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Task 1: Load the data stored in `data_1` using `.read_csv()` api.\n", + "Get an overview of your data by using `info()` and `describe()` functions of pandas." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
symbolingmakefuel-typebody-styledrive-wheelswheel-baselengthwidthheighthorsepowerpeak-rpmhighway-mpgcity-mpgprice
03alfa-romerogasconvertiblerwd88.6168.864.148.81115000272113495
13alfa-romerogasconvertiblerwd88.6168.864.148.81115000272116500
21alfa-romerogashatchbackrwd94.5171.265.552.41545000261916500
32audigassedanfwd99.8176.666.254.31025500302413950
42audigassedan4wd99.4176.666.454.31155500221817450
\n", + "
" + ], + "text/plain": [ + " symboling make fuel-type body-style drive-wheels wheel-base \\\n", + "0 3 alfa-romero gas convertible rwd 88.6 \n", + "1 3 alfa-romero gas convertible rwd 88.6 \n", + "2 1 alfa-romero gas hatchback rwd 94.5 \n", + "3 2 audi gas sedan fwd 99.8 \n", + "4 2 audi gas sedan 4wd 99.4 \n", + "\n", + " length width height horsepower peak-rpm highway-mpg city-mpg price \n", + "0 168.8 64.1 48.8 111 5000 27 21 13495 \n", + "1 168.8 64.1 48.8 111 5000 27 21 16500 \n", + "2 171.2 65.5 52.4 154 5000 26 19 16500 \n", + "3 176.6 66.2 54.3 102 5500 30 24 13950 \n", + "4 176.6 66.4 54.3 115 5500 22 18 17450 " + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df = pd.read_csv('../data/data_1.csv')\n", + "df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "RangeIndex: 205 entries, 0 to 204\n", + "Data columns (total 14 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 symboling 205 non-null int64 \n", + " 1 make 205 non-null object \n", + " 2 fuel-type 205 non-null object \n", + " 3 body-style 205 non-null object \n", + " 4 drive-wheels 205 non-null object \n", + " 5 wheel-base 205 non-null float64\n", + " 6 length 205 non-null float64\n", + " 7 width 205 non-null float64\n", + " 8 height 205 non-null float64\n", + " 9 horsepower 205 non-null int64 \n", + " 10 peak-rpm 205 non-null int64 \n", + " 11 highway-mpg 205 non-null int64 \n", + " 12 city-mpg 205 non-null int64 \n", + " 13 price 205 non-null int64 \n", + "dtypes: float64(4), int64(6), object(4)\n", + "memory usage: 22.5+ KB\n" + ] + } + ], + "source": [ + "df.info()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Insight: Seems no null values" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
symbolingwheel-baselengthwidthheighthorsepowerpeak-rpmhighway-mpgcity-mpgprice
count205.000000205.000000205.000000205.000000205.000000205.000000205.000000205.000000205.000000205.000000
mean0.83414698.756585174.04926865.90780553.724878104.9365855127.80487830.75122025.21951213476.258537
std1.2453076.02177612.3372892.1452042.44352240.609702478.4140076.8864436.5421428114.166248
min-2.00000086.600000141.10000060.30000047.80000048.0000004150.00000016.00000013.0000005118.000000
25%0.00000094.500000166.30000064.10000052.00000070.0000004800.00000025.00000019.0000007788.000000
50%1.00000097.000000173.20000065.50000054.10000095.0000005200.00000030.00000024.00000010595.000000
75%2.000000102.400000183.10000066.90000055.500000116.0000005500.00000034.00000030.00000016558.000000
max3.000000120.900000208.10000072.30000059.800000288.0000006600.00000054.00000049.00000045400.000000
\n", + "
" + ], + "text/plain": [ + " symboling wheel-base length width height horsepower \\\n", + "count 205.000000 205.000000 205.000000 205.000000 205.000000 205.000000 \n", + "mean 0.834146 98.756585 174.049268 65.907805 53.724878 104.936585 \n", + "std 1.245307 6.021776 12.337289 2.145204 2.443522 40.609702 \n", + "min -2.000000 86.600000 141.100000 60.300000 47.800000 48.000000 \n", + "25% 0.000000 94.500000 166.300000 64.100000 52.000000 70.000000 \n", + "50% 1.000000 97.000000 173.200000 65.500000 54.100000 95.000000 \n", + "75% 2.000000 102.400000 183.100000 66.900000 55.500000 116.000000 \n", + "max 3.000000 120.900000 208.100000 72.300000 59.800000 288.000000 \n", + "\n", + " peak-rpm highway-mpg city-mpg price \n", + "count 205.000000 205.000000 205.000000 205.000000 \n", + "mean 5127.804878 30.751220 25.219512 13476.258537 \n", + "std 478.414007 6.886443 6.542142 8114.166248 \n", + "min 4150.000000 16.000000 13.000000 5118.000000 \n", + "25% 4800.000000 25.000000 19.000000 7788.000000 \n", + "50% 5200.000000 30.000000 24.000000 10595.000000 \n", + "75% 5500.000000 34.000000 30.000000 16558.000000 \n", + "max 6600.000000 54.000000 49.000000 45400.000000 " + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.describe()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Insight: No cars have -3 symboling rating,\n", + "\n", + "looking for skewness?\n", + "* med > mean = right \n", + "* med < mean = left\n", + "\n", + "**price distribution is left skewed**" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Task 2: Plot a histogram showing the distribution of the car prices (target variable)." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "sns.distplot(df.price)\n", + "plt.title('Histogram of car prices with KDE')\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "# Same plot with Pandas\n", + "df.price.plot.hist(title=\"Histogram of car prices\")\n", + "plt.xlabel('price')\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Let's define the problem statement: \n", + "\n", + "**Predict the price of the budget cars i.e. price < \\\\$20K**" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Task 3: Plot a countplot of the 'make' column of the dataset which represents the different car makers." + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAb4AAAEWCAYAAAAZwvJqAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy8QZhcZAAAgAElEQVR4nO3debxd0/3/8dfb1JCQIKHmkBhqDAktDcU3VVU1VMxVQ1s1tKq+qJ+qUt8q5Vttv75oKGKoKTFrDY0hhIhEIgMhRXyNMWWk0Ug+vz/WOsnOzbn3npucO5738/G4j7vPOmvvvfY5j9xP1t5rfZYiAjMzs1qxXGs3wMzMrCU58JmZWU1x4DMzs5riwGdmZjXFgc/MzGqKA5+ZmdUUBz6zdkjSVEkDWvicu0p6uYrHu17Sf1XreHWOfZ6km5rj2Nb+OfCZZZKOkDRa0hxJ70r6u6T+rd2utiIinoyIzZdmX0nHSHqq2m2qhuYMwK1xHmucA58ZIOk04A/AhcDawIbAFcD+S3GsFarbuubTntpqVi0OfFbzJHUFfg2cHBF3RsQnETEvIu6LiDNynZ0kPSNpRu4NXi5ppcIxQtLJkqYAU8qco2euc6ykNyVNl3SCpB0ljc/HvbxQv5ekRyV9JOlDSTdL6lZP+7eQ9Lqkw/LrdSUNlfRBLj+lUPc8SUMk3SRpFnBMvrbRkmZJmibp9/WcZ3dJbxVeT5V0em7/TEm3SepUZr8vAVcBO+fe9IzC26tLekDSbEnPSupV57oekfSxpJclHVKuXbnuxpKeyMd5BOhe5/07JL2X2zlc0la5/HjgSODM3Lb7cvlZkl7Nx3tR0oGFY/XO55qZv5vbGmtzfeexVhIR/vFPTf8AewOfAys0UKcv8BVgBaAn8BJwauH9AB4B1gBWLrN/z1znKqATsBcwF7gbWAtYD3gf+Fqu3xv4OvAFoAcwHPhD4XhTgQHADsD/Afvm8uWAMcC5wErAJsBrwDfy++cB84ADct2VgWeAo/L7XYCv1PMZ7A68VacNo4B183W/BJxQz77HAE/VKbse+BjYKX+uNwO35vc6A28Cx+b3dgA+BLaq5/jPAL/Pn9duwGzgpsL7xwGr5vf/AIyr047/qnO8g/N1LQccCnwCrJPfuwX4RX6vE9C/kjaXO49/WufHPT4zWBP4MCI+r69CRIyJiJER8XlETAX+DHytTrXfRsTHEfGvBs51QUTMjYiHSX9Mb4mI9yPibeBJYPt8vn9GxCMR8VlEfED6o173fLsC9wJHR8T9uWxHoEdE/Doi/h0RrwFXA4cV9nsmIu6OiAW5rfOA3pK6R8SciBjZQPvr+lNEvBMRHwP3AX2asC/AnRExKn/2Nxf23xeYGhHX5c/8eWAoMLDuASRtmK/7l/nzGp7bslBEXBsRsyPiM1Lw3y739MuKiDvydS2IiNtIvfid8tvzgI2AdfN3WXp2WXGbrXU58JnBR0D3hp53SdpM0v35dtks0rPA7nWqvVnBuaYVtv9V5nWXfL61JN0q6e18vpvKnO8E4OmIeKxQthGwbr51OiPfVjyb9NyyvnZ+H9gMmCzpOUn7VnAdJe8Vtj8ttb8K+28EfLnOdRwJfLHMMdYFpkfEJ4WyN0obkpaXdFG+dTmL1FOFJT9PCvt8T9K4wrm3LtQ/ExAwStIkScctRZutFTnwmaXbZHNJt//qcyUwGdg0IlYjBRPVqVPNpU5+m4+3bT7fd8uc7wRgQ0mXFcreBF6PiG6Fn1UjYp/62hkRUyLicNIt14uBIZI6V/FaljhnBd4EnqhzHV0i4sQydd8lPSsstnnDwvYRpEFKA4CupNvOsOjzXKxtkjYi9ZJ/DKwZEd2AiaX6EfFeRPwwItYFfgRcIal3BW32UjhthAOf1byImEl6Jva/kg6QtIqkFSV9U9LvcrVVgVnAHElbAOX+AFfTqsAcYIak9YAzytSZTXo+uZuki3LZKGCWpJ9LWjn3draWtGN9J5L0XUk9ImIBUBp4Mr96lwKknu36KgwIasT9wGaSjsrfxYpKA4G+VLdiRLwBjAbOl7SS0hSUbxeqrAp8RurZr0Lqrddt2yaF151JQeoDAEnHknp85NcHS1o/v5ye686voM11z2OtxIHPDIiI3wOnAeeQ/uC9Sfof/925yumknsNsUm/gtjKHqabzSYMjZgIPAHeWqxQRM0iDYL4p6YKImE/6o98HeJ00uOIaUk+nPnsDkyTNAf4IHBYRc6t1IdmjwCTgPUkfNlY5ImaTBgAdBrxDuiV6MWlwSjlHAF8mDZb5FXBD4b0bSLc+3wZeBOo+w/wLsGW+PXl3RLwI/DfpTsA0YBtgRKH+jsCz+fO6F/hpRLxeQZsXO09jn4E1H0W4921mZrXDPT4zM6spDnxmZlZTHPjMzKymOPCZmVlNcYLadqB79+7Rs2fP1m6GmVm7MmbMmA8jokfdcge+dmD9zqvx9++f2trNMDNrUT1O/O4y7S/pjXLlvtWZSeom6aQqH7OPpH0ar2lmZi3FgW+RbkBVAx9pErEDn5lZG+LAt8hFQK+cmPaS/DNR0gRJhwJIulHSwoVJldZI209SJ0nX5bpjJe2RUzP9Gjg0H/NQpXXPns51npa0VKtZm5nZ0nPgW+Qs4NWI6ENKadQH2I6U2PYSSeuQUj8dCwsXL90F+BtwMkBEbAMcDgwmfbbnArdFRJ+8tMlkYLeI2D6/Vzdn4EKSjldaHHT0R3NmNcf1mpnVJAe+8vqT1kmbHxHTgCeAHSPiCdK6ZWuRAtzQvI5Yf+BGgIiYTMoLuFmZ43YF7pA0EbgM2Kq+BkTEoIjoFxH91uyyWjWvzcyspjnwlVd3+ZeiG0lrbB0LXFdB/aILgMciYmtSIuFOS91CMzNbKg58i8wmLV8CMJz0bG55ST2A3UjLvQBcD5wKEBGTCvWPhLRgKWktsJfrHBNSj+/tvH1Mc1yEmZk1zPP4soj4SNKIfBvy78B44AXSWltnRsR7ud40SS+xaLkagCuAqyRNAD4HjomIzyQ9BpwlaRxpYdHfAYMlnUZapqUiK/RYY5nns5iZWeJliZpI0irABGCHvIBps+vXr1+MHj26JU5lZtZhSBoTEf3qlrvH1wSSBgDXAr9vqaAHMO+D95h25UWNVzRr49Y+8azWboKZA18lJP0aGB4R/yA9vzMzs3bKga8CEXFua7fBzMyqw6M6CyT1lPSSpKslTZL0sKSVJV0vaWCuc5GkFyWNl3RpLjs4Z3l5QdLwwrGelPR8/tkll+8u6XFJQyRNztlfKp0OYWZmy8g9viVtChweET+UdDtwUOkNSWsABwJbRERI6pbfOhf4RkS8XSh7H/h6RMyVtClwC1B6yLo9afL6O8AI4KvAU8VGSDoeOB5g/TW6YWZm1eEe35Jej4hxeXsM0LPw3ixgLnCNpO8An+byEcD1kn4ILJ/LVgSuzlMc7gC2LBxnVES8FRELgHF1zgEsnrlljS6dq3NlZmbmwFfGZ4Xt+RR6xTk92U7AUOAA4MFcfgJwDrABME7SmsDPgGmkfJ/9gJUqOYeZmTUv/8FtAkldgFUi4m+SRgL/zOW9IuJZ4FlJ3yYFwK7AWxGxQNLRLOoJmplZK3Lga5pVgXskdSLl5/xZLr8kP8cTMIyU8eUKYKikg4HHgE+W9qQr9vii5z+ZmVWJM7e0A87cYmbWdM7c0o599v4/efV/9m+8olkr6fWTe1q7CWYV8+CWFlCcB2hmZq3Lgc/MzGqKAx8Ls6xMlnRNzsBys6QBeZmiKZJ2yj9PSxqbf2+e971G0rj884GkXym5PGd4eQBYq3CucyU9l88zyFlbzMxalgPfIr2BPwLbAlsARwD9gdOBs4HJwG4RsT0pU8uFABHxg4joA+wPfERaqPZAYHNgG+CHwC6F81weETvmVdhXBvYt1xhJx0saLWn0x3P+XeVLNTOrXQ58i7weERNyNpVJwLBIQ14nkDKrdAXuyAvVXkZKOQZAnt5wB/DjiHiDtGL7LRExPyLeYfFFZ/eQ9GzO6LJn8ThFi2duWalcFTMzWwoOfIsUs6ksKLxeQBr9egHwWO6pfRvoVKh/FXBnXraoZIl5IjlAXgEMjIhtgKvrHMfMzJqZA1/lugJv5+1jSoWSTgZWjYjiSrHDgcMkLS9pHWCPXF4Kch/mLDAe6Wlm1sI8j69yvwMGSzqNxW9dng7Mk1RKbH0V8GfSbcwJwCvAEwARMUPS1bl8KvBcJSf+wlq9PU/KzKxKnLmlHXDmFjOzpnPmlnZszgf/5OlBZQd/Wo3Y5fj7W7sJZh2Gn/HVkef0TWyG4z4uaYn/eZiZWcty4DMzs5riwFfe8pKuljRJ0sOSVpbUR9JISeMl3SVpdVjYk7tY0ihJr0jaNZevLOnWXP820mR18ntX5snpkySd30rXaGZWkxz4ytsU+N+I2AqYARwE3AD8PCK2JY3K/FWh/goRsRNwaqH8RODTXP83QN9C/V/kB67bAl+TtG3dBhQzt8xw5hYzs6px4Cvv9YgoTU8YA/QCukXEE7lsMCk7S8mdhbo98/ZuwE0AETEeGF+of4ik54GxpMwtW9ZtQDFzSzdnbjEzqxqP6iyvmMVlPtCtwvrzWfwzLZe9ZWPS3L8dI2K6pOtx9hYzsxbjHl9lZgLTS8/vgKPIk9IbMBw4EkDS1qTbmgCrAZ8AMyWtDXyz+s01M7P6uMdXuaOBqyStArwGHNtI/SuB6ySNB8YBowAi4gVJY0mJsF8DRjR24i49ensel5lZlThzSzvgzC1mZk3nzC3t2MwPp3D/tb4jWsv2Pe7vrd0Esw6jwz/jk7SfpLPy9gGSlhhBWeFx6s3okldhr/e49WVtkdRP0p+Wpj1mZrZ0OnyPLyLuBe7NLw8A7gderPI5frCU+40GfA/TzKwFteseX+6FTc49romSbpY0QNIISVMk7STpGEmXS9oF2A+4RNI4Sb0knSLpxZxd5dZ8zPMknV44x0RJPfPLFSQNzvWH5IEuC3t0ef296/M+EyT9rNDcg8tkd9ldkketmJm1oHYd+LLewB9J0wW2AI4A+pPmyp1dqhQRT5N6fmdERJ+IeBU4C9g+Z1c5oYJzbQ4MyvVnASfVeb8PsF5EbJ1XWL+u8F657C71KmZumenMLWZmVdMRAt/rETEhIhaQpggMizRUdQKLsqjUZzxws6TvAp9XcK43I6I0/eAmUoAteg3YRNL/SNqbFBxLymV3qVcxc0tXZ24xM6uajhD4illWFhReL6DxZ5jfAv6XlEdzjKQVSAGw+LkUs6rUnfux2OuImA5sBzwOnAxcU6addbO7mJlZC+oIga8pZgOrAkhaDtggIh4DziSlJesCTAV2yHV2ADYu7L+hpJ3z9uHAU8WDS+oOLBcRQ4Fflo5jZmZtR631PG4FrpZ0CnAY8BdJXQEBl0XEDElDge9JGgc8B7xS2P8l4GhJfwamkLKzFK1HytZS+g/F/6tGo7t239TzuMzMqsSZW9oBZ24xM2s6Z25pxz766BUGX79XazfDmsnRxzzc2k0wqykd5hlfMbNKae5ea7fJzMzang4T+MzMzCrRpgOfpIslnVR4fZ6k/5R0SSE7yqGNHONbkp6R1F3SwXm/FyQNz+//TdK2eXuspHPz9gWSfiCpi6Rhkp7P59u/cOxf5swxj0i6pZTxJWeFeVDSGElPStoil18v6U+Snpb0mqSB1f/UzMysIW068JFGYRYD2yHAh6QMKdsBA0gpyNYpt7OkA0nZWfaJiA+Bc4FvRMR2pPRlkBaM3VXSaqQ5fF/N5f2BJ4G5wIERsQOwB/DfSvoBBwHbA98Big9QBwE/iYi+pAwyVxTeWycfe1/govouvJi5ZfbsefVVMzOzJmrTg1siYqyktSStC/QAppOC3i0RMR+YJukJYEdSFpaiPUjBaK+IKGVQGQFcL+l2FmVSeRI4BXgdeAD4es7B2TMiXpa0InChpN1Ik+LXA9YmBa97IuJfAJLuy7+7ALsAd0gqteULhXbdnbPMvKi0Ant91z6IFEDZeOPVPPTWzKxK2nTgy4YAA4EvknqAvSrc7zVgE2Az8goIEXGCpC+TMraMk9SHNFevX67/CNAd+CEptRjAkaSg2zci5kmaSsrmIspbDpgREX3qeb+Yaaa+Y5iZWTNp67c6IQW7w0jBbwjp1uSheSWEHsBuwKgy+71BugV5g6StID17i4hnI+Jc0i3TDSLi38CbpNuoI0k9wNPzb4CuwPs56O0BbJTLnwK+LalT7uV9CyD3Ll+XdHA+pyRtV8XPw8zMlkGb7/FFxCRJqwJvR8S7ku4CdgZeIOXKPDMi3issHVTc92VJR5JuO36b9DxwU1JPa1g+BqQg9x8R8amkJ4H1WRT4bgbukzQaGAdMzsd+TtK9+RhvkHqVM/M+RwJXSjoHWJEUvEvnarI119zMc73MzKrEmVuWgaQuETEnPxMcDhwfEc9X+zzO3GJm1nTO3FIFks4D5kTEpblokKQtSc/8BjdH0AOY9vEULvvrN5rj0NYG/OyIh1q7CWY1xYFvGUTEEZXWVRriqTyi08zMWkl7GNzSrCR1lvRAntQ+UdKhkqbmJYaQ1E/S44VdtpP0qKQpkn6Y65Sd5J7TqL0k6QrgeWADSXMK5x4o6foWu1gzM3OPD9gbeCcivgWgtEzRxQ3U3xb4CtAZGCvpAeB90iT3WTlgjswDXwA2B46NiJPy8ZvpMszMrBI13+MDJgADcnq0XSNiZiP174mIf+VMMI8BO5FGiV4oaTzwDxZNcgd4IyJGNrVRxcwtn8z+d1N3NzOzetR8jy8iXpHUF9gH+K2kh0mpy0r/KehUd5cyr+ub5A7wSQP71z12sV0LM7dssElXD701M6uSmu/x5XRon0bETcClwA7AVKBvrnJQnV32z5PW1wR2J2V+qW+SeznTJH1JaZX2A6t3JWZmVoma7/EB25Amti8A5gEnAisDf5F0NvBsnfqjSDk9NwQuiIh3JJWd5F6Ps4D7SdliJgJdqnkxZmbWME9gbwc8gd3MrOnqm8Be87c6zcystvhWZyuSdHZEXNhYvTenT+G0oXu3RJOsin5/0IOt3QQzK8M9vtZ1dms3wMys1tR84MvZVSZLGixpvKQhklaR1FfSE5LGSHqotMq7pMfz6utI6p6nLpD3uT0f4zZJzxbqHZ4zukyUdHEuuwhYWdK4PDjGzMxaQM0HvmxzYFBEbAvMAk4G/gcYGBF9gWuB3zRyjJOA6fkYF5CnQ+TpEhcDe5JWj99R0gERcRbwr4joExFH1j1YcQL7v2Z5AruZWbU48CVvRsSIvH0T8A1ga+ARSeOAc0hr9DWkP2ndPSJiIjA+l+8IPB4RH0TE56T1/XZrrEERMSgi+kVEv5VXW6nJF2RmZuV5cEtSd07HbGBSROxcpm59WV3qS8Lp5JxmZm2Ie3zJhpJKQe5wYCTQo1QmaUVJW+X3p7Ioq8vAwjGeAg7J9bckTYyHNAH+a/l54PL5+E/k9+ZJWrEZrsfMzOrhHl/yEnC0pD8DU0jP9x4C/pRXa1gB+AMwiZTW7HZJRwGPFo5xBTA4J6oeS7rVOTMi3pX0/0gJrQX8LSLuyfsMAsZLer7cc76SDVbf1EPjzcyqpOYzt0jqCdwfEVsv43GWB1aMiLmSegHDgM0iYplHpjhzi5lZ09WXucU9vupZBXgs37oUcGI1gh7AlBlv8M17TqjGoawF/X3/q1q7CWZWRs0HvoiYShrBuazHmS1pIFXoPZqZWfPx4BYzM6spDnyNyCuzn1R4fZ6k/5R0Sc7EMkHSoWX2e7YwErSU8aWvpDUk3Z0zvIyUtG1LXYuZmTnwVeJWoBjYDgE+JGVh2Q4YQFrPb50y+5WmN6wDrBsRY4DzgbE5w8vZwA3lTlrM3PLvWXOreT1mZjXNga8RETEWWEvSupK2A6aTgt4tETE/IqaR5uXtWGfX24GD8/YhwB15uz9wYz72o8CaecpE3fMuzNyy0mqd6r5tZmZLqeYHt1RoCGmy+hdJPbleje0QEW9L+ijfyjwU+FF+q1wml9qeU2Jm1oLc46vMrcBhpOA3BBgOHCppeUk9SLk3R9Wz35lA14iYkMuGA0cCSNod+DAiZjVv883MrMQ9vgpExCRJqwJv50wsdwE7Ay+QemtnRsR7eTJ80RDgj6TVGkrOA67LGV4+BY5u7PybdtvIc8LMzKqk5jO3tAfO3GJm1nTO3NKOTZnxHvvcdVFrN6PD+9uBZ7V2E8ysBfgZXwPynL3TK6zbU9LE5m6TmZktGwc+MzOrKQ58dUj6haSXJf0D2DyX9clZVsZLukvS6rm8r6QXJD0DnFw4xiqSbs/1b8tZXPrl9/aS9Iyk5yXdIalLa1ynmVmtcuArkNSXNG1he+A7LJqUfgPw85xtZQLwq1x+HXBKmZXaTwKm5/oXkBeuldQdOAcYEBE7AKOB0+ppSyFzyyfVukQzs5rnwLe4XYG7IuLTPLfuXqAz0C0iSqumDwZ2y9lWiuU3Fo7TnzSHj4iYSFqUFuArwJbACEnjSFMZNirXkMUzt3Su3hWamdU4j+pcUqXzO9RA3XLZWUrlj0TE4U1ulZmZVYV7fIsbDhwoaeU8Yf3bwCfAdEm75jpHAU9ExAxgpqT+ufzIwnGeYlGC6i2BbXL5SOCrknrn91aRtFmzXpGZmS2mST0+SZ0josM+cIqI5yXdBowD3gCezG8dDVwlaRXgNeDYXH4scK2kT4GHCoe6Ahics7OMJd3qnBkRH0g6BrhF0hdy3XOAVxpq16bdvug5ZmZmVVJR5hZJuwDXAF0iYsO8SsGPIuKkRnatSZKWB1aMiLmSegHDgM0i4t9LczxnbjEza7plzdxyGfAN0mAPIuIFSbtVsX0dzSrAY5JWJD3XO3Fpgx7AlOkf8K2hf65a42rNAwf9qPFKZlYzKr7VGRFvSouN2Zhf/ea0bTkJ9f0RsXVD9SJiNrDE/zLMzKz1VRr43sy3O0PSSsApwEvN1ywzM7PmUemozhNImUnWA94irUB+coN7dFwrSBqcs7IMySMzp0q6MGdkGS1pB0kPSXpV0gkAkq6QtF/evkvStXn7+5L+qzUvyMysllQa+BZExJERsXZErBUR3wVWa86GtWGbA4NyVpZZpCwtAG/mDC5PAteTFq39CvDr/P5w0gR5SP+B2DJv92fR6NGFFs/cMqc5rsPMrCZVGvjuk7Qw0En6EnBf8zSpzXszIkbk7ZtIgQvywB9SSrNnI2J2RHwAzJXUjRTcds3z+l4Epklah7Sg7dN1T7J45han8zQzq5ZKA9+FpODXJeezHAJ8t/ma1abVnf9Rev1Z/r2gsF16vUJEvA2sDuxN6v09SZrkPicPhjEzsxZQ0eCWiHggD81/GFgVOCAipjRry9quDSXtHBHPAIeTsrRsX+G+zwCnAnsCa5L+AzGkWVppZmZlNRj4JP0Pi/dwViNlLvmJJCLilOZsXBv1EnC0pD8DU4ArgZ9UuO+TwF4R8U9JbwBrUOb5Xl2brt7Dc9HMzKqkwcwtko5uaOeIGFz1FtkSnLnFzKzplipziwNb2/DP6R+z75CbW7sZ7db9A49svJKZ1YyKBrdI2jTPWXtR0muln+ZuXLVIWub5AJJ2l3R/NdpjZmatp9JRndeRnmV9DuxBWpH8xgb3qBJJXjPQzMyqptLAt3JEDCM9E3wjIs4jjUysl6SekiZLukbSREk3SxogaYSkKZJ2ktRZ0rWSnpM0VtL+ed9jJN0h6T7SSFIknSlpgqQXJF2Uy3pJelDSGElPStoil2+cs6g8J+mCOu06I5ePl3R+Luss6YF87ImSDq3nslbLWVdelHSVpOXy/nvl8z2f290ll0+VdH4un1Bo398kjcs/Mxt7lmpmZtVTaW9qbv4jP0XSj4G3gbUq2K83cDBwPPAccARpwvd+wNmkidyPRsRxeZL3KEn/yPvuDGwbER9L+iZwAPDliPhU0hq5ziDghIiYIunLpHXw9gT+CFwZETdIWphaTdJewKbATqRVE+7Nq0z0AN6JiG/lel3ruZ6dSBlX3gAeBL4j6XHSmnoDIuITST8HTmNRxpYPI2IHSScBpwM/iIh98nn6knrTd9c9kaTj8+fGyt3XbPhTNjOzilUa+E4lLbVzCnAB6Xbn9yrY7/WImAAgaRIwLCJC0gSgJ7A+sJ+k03P9TsCGefuRiPg4bw8ArouITwFyMOwC7ALcoUWrRpQWd/0qcFDevhG4OG/vlX/G5tddSIHwSeBSSReTVl+ob4rBqIh4LV/PLaQgPpcUDEfkdqxEmq9Xcmf+PQb4TqlQUvfctkMiYmbdE0XEIFJgp1uvTRpfNNHMzCpSaeAL0h/pjYAVc9nVwLaN7Fc3g0kxu8kKpKWNDoqIl4s75d5bcaV3sWTGlOWAGRHRp4E21yXgtxGxxOJ2ufe1D/BbSQ+TVlQv1TuXlJezXNYWkYL04fW0o3TN88mft9JCtbcCv46IifXsZ2ZmzaDSZ3w3k27JHQTsm3++XYXzP0SaDC8ASfVlQHkYOE7SKrneGhExC3hd0sG5TEorwwOMAA7L28Wx7A/l45Sewa0naS1J6wKfRsRNwKXADhHxbET0yT+lPJw75eeHywGHkrK2jAS+Kql3PuYqkjZr5LovAsZHxK2NfkJmZlZVlfb4Pij88a+mC4A/AONz8JtKCqqLiYgHJfUBRkv6N/A30jPCI4ErJZ1D6oneCrwA/BT4q6SfAkMLx3lYKcH2MznWziHlHO0NXCJpATAPOLGe9j5DClrbkPJt3hURCyQdA9wiqXSr9RzglQau+3RgkqRx+fW5DX2+vVdfw3PRzMyqpMHMLQsrSf9Byks5jMLty4i4s96drGqcucXMrOm0NJlbCo4FtiD1qhbksmDRwA1rRv+cPoP9htzT2s1oc+4duH9rN8HM2qFKA992EbFNs7akhkiaExFd8rPFP0XEwNZuk5lZrah0cMtIpQVUrYoi4h0HPTOzllVp4OsPjJP0cs54MkHS+OZsWFsn6e6cMWZSnmy+WE5QSQMlXZ+3y2aSUcpu4+kMZmYtqNJbnXs3ayvap+PyRPqVgeckDW2gbtlMMg1ZPHNLj2VvrZmZAZWvwP5GczekHTpF0oF5ewNSBpj61JdJpru+qCwAABaZSURBVF6LZ27p7cwtZmZV4pUPloKk3Ulp1HbOuUMfJ6VbKwaoTnV2c/AyM2sDKn3GZ4vrCkzPQW8L4Cu5fJqkL+XMLgcW6teXScbMzFqYe3xL50HghDzA52VS2jKAs4D7gTeBiaQk2FBPJplK9V69m+esmZlVSUWZW6x1OXOLmVnTLWvmFmtFr06fzYFDH2/tZrQ5dx20e2s3wczaoZp5xifpcUlLRP4WOG+3vAht6fXuku5v6XaYmVlSM4GvFXUDTmq0lpmZtYgOF/hyNpTJkgbnLDNDSuv45fe/L+mywusfSvp9Yb9rJE2UdLOkAZJGSJoiaadcf42ctWW8pJGSts3l5xVWkicfoydpGaNeksZJuiS/3SW3a3I+z8Il5M3MrHl1uMCXbQ4MiohtSSunF3tctwL7SSqtJH8saZFdSOvy/ZG0svwWwBGkdG2nk9b/AzgfGJuPfTZwQyNtOQt4NS9oe0Yu2x44FdgS2IQ0wX0xko6XNFrS6M9mzazsqs3MrFEdNfC9GREj8vZNpOAFQER8AjwK7Jvn4K0YERPy269HxISIWABMAoZFGvY6AeiZ6/QnZV8hIh4F1pTUtYntGxURb+XzjCsce6GIGBQR/SKi3xdWa+rhzcysPh11VGfdORp1X19D6q1NZlFvDwqL7JLWHfyssF36rMrdlgzgcxb/j0TdzC1FxfPMp+N+D2ZmbU5H7fFtKGnnvH048FTxzYh4lpRf8wjgliYeezg5+0pOXfZhRMwCpgI75PIdgI1z/dnAqk2+AjMzaxYdtafxEnC0pD8DU4ArgW/XqXM70Ccipjfx2OcB1+WsLZ8CR+fyocD3JI0DngNeAYiIj/IAmYnA34EHmnoxvVZf1XPWzMyqpMNlbskjKe+PiK0bqXc/cFlEDGuJdi0LZ24xM2s6Z27JJHUDRgEvtIegB/Da9H9x8NDaWa/2joMa/D+Lmdky6XCBLyKmAvX+5YyIGcBmLdagOvKq7PdHxJDWaoOZWS3rqINbzMzMynLgawJJnSU9IOmFnJnlUEnnSnouvx5UysKSM8I8l+sOLWaPAQZIelLSK5L2baXLMTOrSQ58TbM38E5EbJcHzzwIXB4RO+bXKwOlQHZnLt+ONMr0+4Xj9AS+BnwLuErSEnP+Fs/c0tSBp2ZmVh8HvqaZQOqtXSxp14iYCewh6VlJE4A9ga1y3a1zr24Cad7fVoXj3B4RCyJiCvAaKT3aYhbP3LJ6816VmVkN6XCDW5pTRLwiqS+wD/BbSQ8DJwP9IuJNSeexKGPL9cABEfGCpGOA3YuHqnvo5my3mZkt4h5fE0haF/g0Im4CLiVnagE+lNQFGFiovirwbk6GfWSdQx0saTlJvUhJql9u5qabmVnmHl/TbANcImkBMA84ETiAdAt0KiljS8kvgWeBN/L7xbRlLwNPAGsDJ0TE3IZOusnqK3tum5lZlXS4zC0dkTO3mJk1nTO3tDBJcyKiSzWO9e6MeVxw1zvVOFSr+uWB67Z2E8zM/IyvUpKWb+02mJnZsqupwCepp6TJkgZLGi9piKRVJP2HpLGSJki6VtIXcv2peYL6U6QBKadIejHve2uu00XSdXnf8ZIOKpzvN3kC+0hJa+eyHnlC+3P5Z4nV183MrPnUVODLNgcGRcS2wCzgNNLUg0MjYhvS7d8TC/XnRkT/iLgVOAvYPu97Qn7/l8DMiNgmlz+ayzsDI/ME9uHAD3P5H0mrQuwIHERaFNfMzFpILQa+NyNiRN6+CfgP4PWIeCWXDQZ2K9S/rbA9HrhZ0ndJK64DDAD+t1ShsL7fv4H78/YYUraWUv3L87p99wKrSVpiodpi5pZPZn3U9Ks0M7OyajHwNXUY6yeF7W+RglxfYIykFQDVc8x5sWjI7HwWDSRaDtg5Ivrkn/UiYvYSjSxkbum82ppNbLKZmdWnFgPfhpJ2ztuHA/8AekrqncuOIs2xW4yk5YANIuIx4EygG9AFeBj4caFeY/nF6tbvs5TXYWZmS6EWA99LwNGSxgNrAJcBxwJ35LyaC4Cryuy3PHBTrjOW9JxuBvBfwOp5dYYXgD0aOf8pQL88EOZFFj0rNDOzFlBTE9gl9SQtAtuu0qB4AruZWdN5Ans7Nn3659w+9MPWbka9Djmoe2s3wcysYjV1qzMipi5Nb0/S083RHjMza3k1FfiWVkTs0tptMDOz6nDgq4CkOfn3OpKGSxqXB7PsWnw/bw+UdH3eHlf4+Zekr0k6T9LphfoT87NHMzNrAQ58TXME8FBE9AG2A8Y1VLk0V4+U3WU0UPEt0+IE9lmewG5mVjUe3NI0zwHX5sVl746IBgMfgKRNgUuAPSNinqSKThQRg4BBAL169amdobdmZs3MPb4miIjhpHRmbwM3Svpe6a1CtU6lDUmdgduBH0ZEaV2hz1n8c++EmZm1GAe+JpC0EfB+RFwN/AXYIb81TdKXcnaXAwu7XAdcFxFPFsqmlvaTtAOwcbM33MzMFvKtzqbZHThD0jxgDlDq8Z1FSkj9JjAR6JKD5EBgM0nH5Xo/AIYC38tJqp8DXqERq6++gufKmZlVSU1lbmmvnLnFzKzpnLmlHZvz0ecMv/GDFj/vbkf1aPFzmpk1t3b7jC+vjt49b58i6SVJN7d2u8zMrG3rKD2+k4BvRsTrS7OzpBUi4vPGay4bSctHxPzmPo+ZmdWvXfT4JN0taYykSZKOr/PeVcAmwL2SfiZpJ0lPSxqbf29ezzEfl3ShpCeAn0raSNKwvFzQMEkb5nrXS7pS0mOSXsvZV67NPczrC8fbS9Izkp6XdIekLrl8qqRzJT0FHCypj6SR+Tx3VbB+n5mZVVG7CHzAcRHRF+gHnCJp4ZLkEXEC8A6wR0RcBkwGdouI7YFzgQsbOG63iPhaRPw3cDlwQ0RsC9wM/KlQb3VgT+BnwH2kNfy2ArbJgaw7cA4wICJ2IGVpOa2w/9yI6B8RtwI3AD/P55kA/Kpcw4qZW2bMduYWM7NqaS+3Ok+RVJoftwGwaQN1uwKDc8aUAFZsoO5the2dge/k7RuB3xXeuy8iIi9COy0iJgBImgT0BNYHtgRG5MwsKwHP1D2PpK6kYFta4X0wcEe5hhUzt2yxsTO3mJlVS5sPfJJ2BwYAO0fEp5Iep+FsJxcAj0XEgTn58+P5ONcB2wPvRMQ+ue4nDRynGGw+y78XFLZLr1cA5gOPRMTh9RyrofOYmVkLag+3OrsC03PQ2wL4SgX1387bx5QKI+LYnDR6n7J7pQTSh+XtI4GnmtDGkcBXJfUGkLSKpM3qVoqImcD00qoOwFHAE3XrmZlZ82nzPT7gQeAESeOBl0lBpiG/I93qPA14tAnnOYWUgPoM4APg2Ep3jIgPJB0D3CLpC7n4HMpnZTkauErSKsBrlZyny5oreE6dmVmVOHNLO+DMLWZmTefMLe3Y3A/mMfmKaS1+3i1OWrvFz2lm1tzawzO+ZZbn4g1s4XPOaeT9bpJOaqn2mJlZUhOBrzlIWn4ZD9GNlHHGzMxaUIcMfJK+lzOjvCDpxly8W87k8lqx9yfpDEnP5frn57KLi70xSedJ+k9Ju+cMLn8lTT5H0mmSJuafU+tpzxLnAC4CekkaJ+mS5vgczMxsSR3uGZ+krYBfAF+NiA8lrQH8HlgH6A9sAdwLDJG0F2ky/E6ASGnPdgNuBf4AXJEPewiwNyk12k7A1hHxuqS+pFGZX877PyvpiYgYW2hPfec4Kx+nTz3XcTxwPMC6a6xflc/GzMw6Zo9vT2BIRHwIEBEf5/K7I2JBRLwIlEZt7JV/xgLPk4LipjlwrSVpXUnbkeYR/l/eZ1QhGXZ/4K6I+CQi5gB3AqU5eiVlz9HYRUTEoIjoFxH9Vu+yRlM/AzMzq0eH6/GRelXl5mh8VqdO6fdvI+LPZeoPIa2g/kVSD7CkmIVFNK7sOXJWGTMza2Edscc3DDiklMg63+qsz0PAcYWVFNaTtFZ+71ZSJpeBpCBYznDggJyppTNwIPBkheeYDaza5KszM7Nl0uF6fBExSdJvgCckzSfdYqyv7sOSvgQ8k5NLzwG+C7yfj7Mq8HZEvFvP/s/npYlG5aJris/3GjpHRLwqaYSkicDfI+KM+trZqceKnlNnZlYlztzSDjhzi5lZ0zlzSzs2b9pnvHfpay1+3i+evkmLn9PMrLl1xGd8y0TS063dBjMzaz4OfHVExC4tfc4qZIExM7MKOfDVIWmOpC6Shkl6XtIESfsX3v+lpMmSHpF0i6TTc/njkvrl7e6SpubtnpKezMd6XtIuuXyJLDBmZtb8/IyvvLnAgRExS1J3YKSke4G+wEGkldxXIE1IH9PIsd4Hvh4RcyVtCtwClB62LswCU3enYuaW9bqtW4VLMjMzcOCrj4ALc2qxBcB6pGwv/YF7IuJfAJLuq+BYKwKXS+oDzAeKK7OPKhf0IGVuAQYBbLfBNh56a2ZWJQ585R0J9AD6RsS8fNuyEw1navmcRbeOOxXKfwZMA7bL788tvFfMAmNmZi3Az/jK60qaxD5P0h7ARrn8KeDbkjrlTCzfKuwzlXQrFFK2l+Kx3o2IBcBRgAeymJm1Ivf4lhTAzcB9kkYD44DJABHxXH7W9wLwBjAamJn3uxS4XdJRwKOF410BDJV0MPAYS9HLW3HtL3hOnZlZlThzS0HO7/l8RGzUQJ0uETFH0iqkXJ3HR8TzzdkuZ24xM2s6Z25phKR1gcdJPbeGDJK0Jek53uDmDnoA86Z9wrQ/jGq8YoXWPnWnqh3LzKy9ceDLIuIdFh9xWV+9I5blPHmgTD/SYJgjIuKKhvcwM7Nq8uCWTElLfh7dgJNa8HxmZkaNB76cVeUlSVeQJqMfJemZnGHljsIaelMlnV/I5LJFLt9J0tOSxubfm+fyYyRdXjjP/ZJ2r3P6i4BeksZJuqRFLtjMzGo78GWbAzcAXwe+DwyIiB1IIzZPK9T7MJdfCZyeyyYDu0XE9sC5wIVNOO9ZwKsR0afcWnySjpc0WtLojz+Z0eSLMjOz8vyMD96IiJGS9gW2BEbkBWNXAp4p1Lsz/x4DfCdvdwUG51RkQcrSUhWLZ275kofemplViQPfonl1Ah6JiMPrqfdZ/j2fRZ/bBcBjEXGgpJ6kUaGweBYXWDyTi5mZtSLf6lxkJPBVSb0BJK0iqbFRnl2Bt/P2MYXyqUAfSctJ2oCUjLqu2cCqy9RiMzNrMvf4soj4QNIxwC2SvpCLzwFeaWC335FudZ7G4tlaRgCvk5YbmkgaOFP3fB9JGiFpIvD3cs/5SlZcu7Pn3pmZVYkzt7QDztxiZtZ0ztzSjn3+/izev/zBRuut9eO9W6A1Zmbtm5/xmZlZTXHgawJJy9RDXtb9zcxs2dVE4MsZWiZLukbSREk3SxqQB5dMyRlYOku6VtJzORPL/nnfY3IWl/uAh3PZmTmDywuSLsplj0vql7e755ycS+wv6cbSsfP7N0var4U/EjOzmlVLPZDewMHA8cBzwBFAf2A/4GzgReDRiDhOUjdglKR/5H13BraNiI8lfRM4APhyRHwqaY0Kzl3c/2ukVdnvkdQV2AU4uu4Oko7PbWX91dda6os2M7PF1USPL3s9IibkldAnAcMiDWmdAPQE9gLOkjSONBG9E7Bh3veRiPg4bw8ArouITwEK5Q1ZuH9EPAH0lrQWcDgwNCI+r7tDRAyKiH4R0W/NLl2X7orNzGwJtdTj+6ywvaDwegHpc5gPHBQRLxd3kvRlFl81XaT0ZHUVs7XUzdRSd9X1G4EjgcOA4ypsv5mZVUEt9fga8xDwE+VEnZK2r6few8BxeQV2Crc6pwJ98/bARs51PXAqQERMWvomm5lZU9VSj68xFwB/AMbn4DcV2LdupYh4UFIfYLSkfwN/Iz0jvBS4XdJRLJ7FZQkRMU3SS8DdlTRshbVW8xw9M7MqceaWVpB7ixOAHSJiZgX1ZwMvN1avHegOfNjajaiCjnAdHeEawNfR1rS169goInrULXSPr4VJGgBcC/y+kqCXvVwu7U57I2m0r6Nt6AjXAL6Otqa9XIcDXwuLiH+waLSomZm1MA9uMTOzmuLA1z4Mau0GVImvo+3oCNcAvo62pl1chwe3mJlZTXGPz8zMaooDn5mZ1RQHvjZM0t6SXpb0T0lntXZ7lpakqXk1i3GS2s1S8nm1jvclTSyUrSHpkbyqxyOSVm/NNlainus4T9Lb+TsZJ2mf1mxjJSRtIOkxSS9JmiTpp7m83XwnDVxDu/o+JHWSNCqvUDNJ0vm5fGNJz+bv4jZJK7V2W8vxM742StLywCvA14G3SCtKHB4RL7Zqw5ZCXqKpX0S0pYmtjZK0GzAHuCEits5lvwM+joiL8n9GVo+In7dmOxtTz3WcB8yJiEtbs21NIWkdYJ2IeF7SqsAY0kopx9BOvpMGruEQ2tH3kbNbdY6IOZJWBJ4CfgqcBtwZEbdKugp4ISKubM22luMeX9u1E/DPiHgtIv4N3Ars38g+VkURMRyou/rG/sDgvD2Y9EerTavnOtqdiHg3Ip7P27OBl4D1aEffSQPX0K5EMie/XDH/BLAnMCSXt9nvwoGv7VoPeLPw+i3a4T+QLEiL8I7J6wy2Z2tHxLuQ/ogB7XmxxB9LGp9vhbbZ24PlSOoJbA88Szv9TupcA7Sz70PS8nkZt/eBR4BXgRmFZdba7N8sB762S2XK2ut96a9GxA7AN4GT8603a11XAr2APsC7wH+3bnMqJ6kLMBQ4NSJmtXZ7lkaZa2h330dEzI+IPsD6pDtUXypXrWVbVRkHvrbrLWCDwuv1gXdaqS3LJCLeyb/fB+4i/SNpr6bl5zSl5zXvt3J7lkpETMt/uBYAV9NOvpP8PGkocHNE3JmL29V3Uu4a2uv3ARARM0iLd38F6CaplAqzzf7NcuBru54DNs2jpFYiLVp7byu3qckkdc4P8ZHUmbTS/cSG92rT7gWOzttHA/e0YluWWilQZAfSDr6TPKDiL8BLEfH7wlvt5jup7xra2/chqYekbnl7ZWAA6XnlYyxaj7TNfhce1dmG5SHNfwCWB66NiN+0cpOaTNImpF4epKTof20v1yHpFmB30lIr04BfkdZQvJ2UaPz/gIMjok0PHKnnOnYn3VYL0tqTPyo9J2urJPUHniQt6bUgF59NekbWLr6TBq7hcNrR9yFpW9LgleVJHajbI+LX+d/7rcAawFjguxHxWeu1tDwHPjMzqym+1WlmZjXFgc/MzGqKA5+ZmdUUBz4zM6spDnxmZlZTHPjMrEVIOlXSKq3dDjNPZzCzFtFeV+mwjsc9PjNbSNL3cqLkFyTdKGkjScNy2TBJG+Z610saWNhvTv69u6THJQ2RNFnSzUpOAdYFHpP0WOtcnVmyQuNVzKwWSNoK+AUpqfiHktYgZee4ISIGSzoO+BONLzWzPbAVKU/jiHy8P0k6DdjDPT5rbe7xmVnJnsCQUmDKab92Bv6a378R6F/BcUZFxFs54fI4oGcztNVsqTnwmVmJaHwZmdL7n5P/fuTEyysV6hRzM87Hd5asjXHgM7OSYcAhktYEyLc6nyatDAJwJPBU3p4K9M3b+5NW4G7MbGDVajXWbGn5f2JmBkBETJL0G+AJSfNJ2fVPAa6VdAbwAXBsrn41cI+kUaSA+UkFpxgE/F3SuxGxR/WvwKwyns5gZmY1xbc6zcyspjjwmZlZTXHgMzOzmuLAZ2ZmNcWBz8zMaooDn5mZ1RQHPjMzqyn/H07D2Efbpg/uAAAAAElFTkSuQmCC\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "# By default SNS countplot will order the bars according to ascending order of the label, here makers\n", + "# Change the order to ascending/ descending order of size of bars use this pandas trick\n", + "## see Github issue for details https://github.com/mwaskom/seaborn/issues/1029#issuecomment-342365439\n", + "sns.countplot(y='make', data=df, order = df['make'].value_counts().index)\n", + "plt.title('Car makers in the dataset')\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "# Same using Pandas, somewhat straight forward\n", + "df.make.value_counts(ascending=True).plot.barh(title='Car makers in the dataset')\n", + "plt.xlabel('count')\n", + "plt.ylabel('make')\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Insight: Top 6 car makers are Japanese" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Task 3.1: Do these Japanese car maker manufacture budget cars?" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "# lets make a box-plot with while keeping the sorting order as per the countplot above\n", + "plt.figure(figsize=(10,6))\n", + "sns.boxplot(y='make',x='price',data=df, order=df['make'].value_counts().index)\n", + "plt.title('Car price distribution by car makers')\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Insight: Japanese makers are the ones making more of these budget cars" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Task 4: Plot a boxplot that shows the variability of each 'body-style' with respect to the 'price'.\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAnQAAAGTCAYAAAChwI9oAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy8QZhcZAAAgAElEQVR4nO3de5xfVX3v/9e7IQIqctHUCkFDlR6jsVKlij/jOQRbBKyFVq3moKCmoh5M29MeBRpbxDoV2tPa46UqNhHwMoh4o4BFqkGbqkCUi8HRQw5iiaEay0WsiAE/vz/2GvkyTDKT62TPvJ6Px/cx+7v22nuv/d1zec/ae+2dqkKSJEn99QtT3QBJkiRtGwOdJElSzxnoJEmSes5AJ0mS1HMGOkmSpJ4z0EmSJPWcgU7SVktyTpK3TnU7ptrmPockr0iyajtt54okv7+d1jXlxy7Jm5N8aCrbIE0XBjppGkhyc5K7k/woye1JLkly4FS3a1CSSvKEqW6HdpztGV4lbRkDnTR9vKCqHg48Bvge8M4pbs8Ok46/vySp8ReiNM1U1U+AC4EnjZYl2TvJeUk2JPlOkjeNBqIk70ly4UDds5J8roWmw5OsS/KnSX7QegKP39S2k7w6ydoktyW5KMn+rfyLrcp1rRfxJeMsOyvJ37TtfDvJ61uv3m5t/hVJhpL8K/Bj4JeT7N+2c1vb7qsH1veAU4qj+zLw/uYkpyX5RuvV/ECSPQbm/1aSa5PckeRLSX51YN6vJflakruSfBT4+XKb/mjyziR3Jvlmkue2whcn+eqYin+S5FObWdfjk1zV1vXpJPsNLPvbSW5obb4iyfzJtDnJmiQvGHg/ux2HQzaxM69IclNb17eTHN+29V7gWe0Y35Hk15N8b/QYtmVfmOTaTaz3sPZZ35HkuiSHb+ZzkDTAQCdNM0keCrwE+MpA8TuBvYFfBv4bcALwyjbvT4BfbX+knwMsAU6s+58L+EvAo4ADgBOBs5P8l3G2ewTwNuD36HoJvwOcD1BV/7VVe2pVPbyqPjpO018NHA0cAjwNOG6cOi8HTgL2ausfBtYB+wMvAv5yNCxN0vHA84DHA78CvKnty9OAFcBrgEcC7wMuSrJ7kocAnwI+COwHfAx44QTbeSZwE93neDrwiRbELgIOGgxewMvaujflBOBVdPt8L/CO1uZfofs8/giYA1wK/GOSh0yizee17Y46Bri1qh4UvJI8rG3z6KraC/j/gGuragR4LfDldoz3qaqrgf8AfnOi/UtyAHAJ8NbWxv8FfDzJnM18FpIaA500fXwqyR3AD+n+gP41dD1fdAHvtKq6q6puBv6GLhxRVT+m+yP7t8CHgKVVtW7Muv+squ6pqi/Q/dH9vXG2fzywoqq+VlX3AKfR9dbMm2T7fw/4P1W1rqpuB84cp845VXVDVd1LFzQXAqdU1U9a+PiH0f2apHdV1S1VdRswBCxu5a8G3ldVV1bVfVV1LnAPcFh7zQb+rqo2VtWFwNUTbOf7A/U/CnwLeH77nD5KC1NJngzMAy7ezLo+WFVrquo/gT8Dfm/gGF9SVZdX1UbgfwN70gWuidr8IeCYJI9o71/O5kPlz4AFSfasqlur6obN1D13YP/2owvQHxmn3suAS6vq0qr6WVVdDqymC5eSJmCgk6aP46pqH2B34PXAF5KM9q49hK5Ha9R36HrcAKiqq+h6kAJcMGa9t7fwMLjs/uNsf//BbVTVj+h6Zw4Yp+549gduGXh/yzh1Bsv2B26rqrvGtG2y2xu7vsH9ehzwJ+3U3x0tKB/Y5u8PfHegB3N02c0Zr/7ots4F/nuS0AWpC1rQm2ybZ9Md47Gf/89a3QMmanNVrQf+FXhhkn3oeko/DJDkve0U6o+S/Gn7XngJXW/crekG4DxxM+39EPCCJA+nC+3/UlW3jlPvccCLx3zmC+l6eyVNwEAnTTOtR+kTwH10fxB/AGyk+4M56rHAd0ffJDmZLgiuB944ZpX7ttNsg8uuH2fT6we30ZZ55OB2JnArMHfg/XijdAcDyXpgvyR7jWnb6Pb+E3jowLxfGmd9g9sY3K9bgKF22nD09dCqGm7tPKAFsMFlN2e8+usBquorwE+B5wD/nc33jI3X5o10x3js559W97uTbPNoT9qL6U6bfre177XtFOrDq+ovW9llVfWbdGHrm8D72zpqzDpp6/ky8DtsvufvFrrex8HP/GFVNV5PraQxDHTSNJPOscC+wEhV3UfX6zaUZK8kjwP+mK7nZPTaq7fS/TF/OfDGcS6GP6Ndi/Uc4LforsEa6yPAK5MckmR34C+BK9spXuhG3v7yZpp+AfCHSQ5ovUSnbG4/q+oW4EvA25Ls0QYtLKH1LAHX0p1G3K/1VP7ROKs5OcncdirwT+lOf0IXUF6b5Jnt83xYkue38PhlumvX/iDJbkl+F3jG5toK/GKrPzvJi4H5dNe4jToPeBdwb1VNdNuPlyV5UrtW8i3AhQPH+PlJnptkNt21kfe0z2gybf4U3bWLf9jaM64kj043+OJhbf0/ovvnAbpjPLddszfoPLp/FJ4CfHITqx7tyXteugEye6QbyDJ3E/UlDTDQSdPHPyb5Ed01dEN0AxtGr21aStdjdROwii58rWijDz8EnFVV11XVjXTB5oMtlAH8O3A7XQ/Qh4HXVtU3x268qj5Hd03Xx+l6hB4PvHSgypuBc9vptPGuwXs/8FngeuAausBzL/eHhfEsprvmbD1dUDi9XXsFXU/QdcDNbb3jDcT4SJt3U3u9te3Larrr6N7V9n0t8Io276fA77b3t9OdfvzEZtoIcCVwMF1P2hDwoqr6j4H5HwQWMHHv3Gjdc+iOyx7AH7R2fYsulL+zbecFdLey+elk2lxVd9Mdu4Mm2J9foAuL64Hb6AbZ/I827/PADcC/J/nBwDKfpOs9/OSY0/eD278FOJbu+28DXY/dG/DvlDQpeeAlFZJ0v3bbiA9V1U7vJUlyNPDeqnrchJW3bv03A79fVf+8I9a/hW3Zk27gxNNaqJ6qdvw58CtV9bIJK2/5uv8f8Jpd4fOWpiP/85G0S0iyZ5Jj2inBA+hu77Gp03PTzeuAq6c4zO1Hd8r67B2w7hfSXV/3+e29bkmd3SauIkk7RYAz6E6N3k13e5Q/n9IW7QStpzCMf9+9ndWGVwN/Rzco4YsT1d/CdV9Bd5Prl7eRt5J2AE+5SpIk9ZynXCVJknrOQCdJktRzBjpJkqSeM9BJkiT1nIFOkiSp5wx0kjQJSZ6T5FtT3Q5JGo+3LZEkSeo5e+gkaQLtmbeStMsy0EmasZLcnOS0JN9IcnuSDyTZI8nhSdYlOSXJvwMfGC0bWPbAJJ9IsiHJfyR518C8VyUZaeu8LMkOeR6tJI0y0Ema6Y4Hngc8HvgV4E2t/JeA/YDHAScNLpBkFnAx8B1gHnAAcH6bdxzwp8DvAnOAfwGGd/A+SJrhDHSSZrp3VdUtVXUbMAQsbuU/A06vqnuq6u4xyzwD2B94Q1X9Z1X9pKpWtXmvAd5WVSNVdS/wl8Ah9tJJ2pEMdJJmulsGpr9DF9QANlTVTzaxzIHAd1pgG+txwP9JckeSO4DbgND14knSDuGFvpJmugMHph8LrG/Tm7sFwC3AY5PsNk6ouwUYqqoPb8c2StJm2UMnaaY7OcncJPvRXfv20UkscxVwK3Bmkoe1gRTPbvPeC5yW5MkASfZO8uId0nJJagx0kma6jwCfBW5qr7dOtEBV3Qe8AHgC8G/AOuAlbd4ngbOA85P8EFgDHL1DWi5JjTcWljRjJbkZ+P2q+uepboskbQt76CRJknrOQCdJktRznnKVJEnqOXvoJEmSeq6396F71KMeVfPmzZvqZkiSJO00X/3qV39QVXPGlvc20M2bN4/Vq1dPdTMkSZJ2miTfGa/cU66SJEk9Z6CTJEnqOQOdJElSzxnoJEmSes5AJ0mS1HMGOkmSpJ4z0EmSJPWcgU6SJKnnDHSSJEk9Z6CTJEnqOQOdJElSzxnoJEmSem7SgS7JrCTXJLm4vT8nybeTXNteh7TyJHlHkrVJrk/ytIF1nJjkxvY6caD86Um+3pZ5R5Jsz52UJEmaznbbgrp/CIwAjxgoe0NVXTim3tHAwe31TOA9wDOT7AecDhwKFPDVJBdV1e2tzknAV4BLgaOAz2z57kg71lT9r1FVU7JdSVI/TKqHLslc4PnAP0yi+rHAedX5CrBPkscAzwMur6rbWoi7HDiqzXtEVX25ur9a5wHHbc3OSDtaVW3163GnXLzVy0qStDmTPeX6d8AbgZ+NKR9qp1XfnmT3VnYAcMtAnXWtbHPl68Ypf5AkJyVZnWT1hg0bJtl0SZKk6W3CQJfkt4DvV9VXx8w6DXgi8OvAfsApo4uMs5raivIHF1adXVWHVtWhc+bMmajpkiRJM8JkeuieDfx2kpuB84Ejknyoqm5tp1XvAT4APKPVXwccOLD8XGD9BOVzxymXJEnSJEwY6KrqtKqaW1XzgJcCn6+ql7Vr32gjUo8D1rRFLgJOaKNdDwPurKpbgcuAI5Psm2Rf4EjgsjbvriSHtXWdAHx6O++nJEnStLUlo1zH+nCSOXSnTK8FXtvKLwWOAdYCPwZeCVBVtyX5C+DqVu8tVXVbm34dcA6wJ93oVke4SpIkTdIWBbqqugK4ok0fsYk6BZy8iXkrgBXjlK8GFmxJWyRJktTxSRGSJEk9Z6CTJEnqOQOdJElSzxnoJEmSes5AJ0mS1HMGOkmSpJ4z0EmSJPWcgU6SJKnnDHSSJEk9Z6CTJEnqOQOdJElSzxnoJEmSes5AJ0mS1HMGOkmSpJ4z0EmSJPWcgU6SJKnnDHSSJEk9Z6CTJEnqOQOdJGnGGR4eZsGCBcyaNYsFCxYwPDw81U2StsluU90ASZJ2puHhYZYtW8by5ctZuHAhq1atYsmSJQAsXrx4ilsnbR176CRJM8rQ0BDLly9n0aJFzJ49m0WLFrF8+XKGhoamumnSVjPQSZJmlJGREdatW/eAU67r1q1jZGRkqpsmbTVPuUqSZpT999+fU045hQ9/+MM/P+V6/PHHs//++09106StZg+dJGnGqarNvpf6xkAnSZpR1q9fz1/91V+xdOlS9thjD5YuXcpf/dVfsX79+qlumrTVPOUqSZpR5s+fz9y5c1mzZs3Py1auXMn8+fOnsFXStrGHTpI0oyxbtowlS5awcuVKNm7cyMqVK1myZAnLli2b6qZJW80eOknSjDJ6r7mlS5cyMjLC/PnzGRoa8h506rVJ99AlmZXkmiQXt/cHJbkyyY1JPprkIa189/Z+bZs/b2Adp7XybyV53kD5Ua1sbZJTt9/uSZL0YIsXL2bNmjXcd999rFmzxjCn3tuSU65/CAzepOcs4O1VdTBwO7CklS8Bbq+qJwBvb/VI8iTgpcCTgaOAv28hcRbwbuBo4EnA4lZXkiRJkzCpQJdkLvB84B/a+wBHABe2KucCx7XpY9t72vzntvrHAudX1T1V9W1gLfCM9lpbVTdV1U+B81tdSZIkTcJke+j+Dngj8LP2/pHAHVV1b3u/DjigTR8A3ALQ5t/Z6v+8fMwymyp/kCQnJVmdZPWGDRsm2XRJkqTpbcJAl+S3gO9X1VcHi8epWhPM29LyBxdWnV1Vh1bVoXPmzNlMqyVJkmaOyYxyfTbw20mOAfYAHkHXY7dPkt1aL9xcYPSOjOuAA4F1SXYD9gZuGygfNbjMpsolSZI0gQl76KrqtKqaW1Xz6AY1fL6qjgdWAi9q1U4EPt2mL2rvafM/X90zVS4CXtpGwR4EHAxcBVwNHNxGzT6kbeOi7bJ3kiRJM8C23IfuFOD8JG8FrgGWt/LlwAeTrKXrmXspQFXdkOQC4BvAvcDJVXUfQJLXA5cBs4AVVXXDNrRLkiRpRtmiQFdVVwBXtOmb6Eaojq3zE+DFm1h+CBgap/xS4NItaYskSZI6PvpLkiSp5wx0kiRJPWegkyRJ6jkDnSRJUs8Z6CRJknrOQCdJktRzBjpJkqSeM9BJkiT1nIFOkiSp5wx0kiRJPWegkyRJ6jkDnSRJUs8Z6CRJknrOQCdJktRzBjpJ0owzPDzMggULmDVrFgsWLGB4eHiqmyRtk92mugGSJO1Mw8PDLFu2jOXLl7Nw4UJWrVrFkiVLAFi8ePEUt07aOvbQ7QRJpuQlSXqwoaEhli9fzqJFi5g9ezaLFi1i+fLlDA0NTXXTpK1moNsJqmqrXo875eKtXraqpnq3JWmXNDIywsKFCx9QtnDhQkZGRqaoRdK2M9BJkmaU+fPns2rVqgeUrVq1ivnz509Ri6RtZ6CTJM0oy5YtY8mSJaxcuZKNGzeycuVKlixZwrJly6a6adJWc1CEJGlGWbx4MV/60pc4+uijueeee9h999159atf7YAI9Zo9dJKkGWV4eJhLLrmEz3zmM/z0pz/lM5/5DJdccom3LlGvGegkSTOKo1w1HRnoJEkziqNcNR0Z6CRJM8r8+fM544wzHvCkiDPOOMNRruo1A50kaUZZtGgRZ511Fq961au46667eNWrXsVZZ53FokWLprpp0lYz0EmSZpSVK1dyyimnsGLFCvbaay9WrFjBKaecwsqVK6e6adJW87YlkqQZZWRkhGuuuYa3vvWtPy/buHEjb3vb26awVdK2mbCHLskeSa5Kcl2SG5Kc0crPSfLtJNe21yGtPEnekWRtkuuTPG1gXScmubG9Thwof3qSr7dl3hEfRCpJ2kF8UoSmo8mccr0HOKKqngocAhyV5LA27w1VdUh7XdvKjgYObq+TgPcAJNkPOB14JvAM4PQk+7Zl3tPqji531DbvmSRJ4/BJEZqOJjzlWt1T3n/U3s5ur809+f1Y4Ly23FeS7JPkMcDhwOVVdRtAksvpwuEVwCOq6sut/DzgOOAzW7VHkiRtxugTIZYuXcrIyAjz589naGjIJ0Wo1yY1KCLJrCTXAt+nC2VXtllD7bTq25Ps3soOAG4ZWHxdK9tc+bpxysdrx0lJVidZvWHDhsk0XZKkB1m8eDFr1qzhvvvuY82aNYY59d6kAl1V3VdVhwBzgWckWQCcBjwR+HVgP+CUVn28699qK8rHa8fZVXVoVR06Z86cyTRdkiRp2tui25ZU1R3AFcBRVXVrde4BPkB3XRx0PWwHDiw2F1g/QfncccolSZI0CZMZ5TonyT5tek/gN4BvtuviaCNSjwPWtEUuAk5oo10PA+6sqluBy4Ajk+zbBkMcCVzW5t2V5LC2rhOAT2/f3ZQk6X7Dw8MPeFLE8PDwVDdJ2iaTuQ/dY4Bzk8yiC4AXVNXFST6fZA7dKdNrgde2+pcCxwBrgR8DrwSoqtuS/AVwdav3ltEBEsDrgHOAPekGQzggQpK0QwwPD7Ns2TKWL1/OwoULWbVqFUuWLAHwWjr11mRGuV4P/No45Udson4BJ29i3gpgxTjlq4EFE7VFkqRtNTQ0xPLly3/+qK9FixaxfPlyli5daqBTb/noL0nSjDIyMsLChQsfULZw4UJGRkamqEXStjPQSZJmFJ8UoenIQCdJmlF8UoSmo8kMipAkadrwSRGajgx0kqQZZ/HixQY4TSuecpUkSeo5A50kSVLPGegkSZJ6zkAnSZLUcw6K0Iz01DM+y513b9zp25136iU7bVt77zmb604/cqdtT5I0dQx0mpHuvHsjN5/5/Kluxg61M8OjJGlqecpVkiSp5wx0kiRJPWegkyRJ6jmvoZMk9V6Snb7Nqtrp25Q2xR46SVLvVdVWvR53ysVbvay0KzHQSZIk9ZyBTpIkqecMdJIkST1noJMkSeo5A50kSVLPGegkSZJ6zkAnSZLUcwY6SZKknjPQSZIk9ZyP/toCTz3js9x598adus15p16yU7e3956zue70I3fqNiVJ0rYx0G2BO+/eyM1nPn+qm7FD7ewAKUmStt2Ep1yT7JHkqiTXJbkhyRmt/KAkVya5MclHkzykle/e3q9t8+cNrOu0Vv6tJM8bKD+qla1Ncur2301JkqTpazLX0N0DHFFVTwUOAY5KchhwFvD2qjoYuB1Y0uovAW6vqicAb2/1SPIk4KXAk4GjgL9PMivJLODdwNHAk4DFra4kSZImYcJAV50ftbez26uAI4ALW/m5wHFt+tj2njb/uUnSys+vqnuq6tvAWuAZ7bW2qm6qqp8C57e6kiRJmoRJjXJtPWnXAt8HLgf+H3BHVd3bqqwDDmjTBwC3ALT5dwKPHCwfs8ymysdrx0lJVidZvWHDhsk0XZIkadqbVKCrqvuq6hBgLl2P2vzxqrWv2cS8LS0frx1nV9WhVXXonDlzJm64JEnSDLBF96GrqjuAK4DDgH2SjI6SnQusb9PrgAMB2vy9gdsGy8css6lySZIkTcJkRrnOSbJPm94T+A1gBFgJvKhVOxH4dJu+qL2nzf98VVUrf2kbBXsQcDBwFXA1cHAbNfsQuoETF22PnZMkSZoJJtND9xhgZZLr6cLX5VV1MXAK8MdJ1tJdI7e81V8OPLKV/zFwKkBV3QBcAHwD+Cfg5HYq917g9cBldEHxglZXknYpw8PDLFiwgFmzZrFgwQKGh4enukmSBEzixsJVdT3wa+OU30R3Pd3Y8p8AL97EuoaAoXHKLwUunUR7JWlKDA8Ps2zZMpYvX87ChQtZtWoVS5Z0d2tavHjxFLdO0kzns1wlaRKGhoZYvnw5ixYtYvbs2SxatIjly5czNPSg/1Elaacz0EnSJIyMjLBw4cIHlC1cuJCRkZEpapEk3c9AJ0mTMH/+fFatWvWAslWrVjF//nh3cZKknctAJ0mTsGzZMpYsWcLKlSvZuHEjK1euZMmSJSxbtmyqmyZJBjpJmozFixczNDTE0qVL2WOPPVi6dClDQ0MOiJCmmKPPOxOOcpUkdRYvXmyAk3Yhjj6/nz10kiSplxx9fj8DnSRJ6iVHn9/PQCdJknrJ0ef3M9BJkqRecvT5/RwUIUmSeml04MPSpUsZGRlh/vz5M3b0uYFOkiT1lqPPO55ylSRJ6jkDnSRJUs8Z6CRJknrOQCdJktRzBjpJkqSeM9BJkiT1nIFOkiSp5wx0kiRJPWegkyRJ6jkDnSRJUs8Z6CRJknrOQCdJktRzBjpJkqSeM9BJkiT1nIFOkiSp5wx0kiRJPTdhoEtyYJKVSUaS3JDkD1v5m5N8N8m17XXMwDKnJVmb5FtJnjdQflQrW5vk1IHyg5JcmeTGJB9N8pDtvaOSJEnT1WR66O4F/qSq5gOHAScneVKb9/aqOqS9LgVo814KPBk4Cvj7JLOSzALeDRwNPAlYPLCes9q6DgZuB5Zsp/2TJEma9iYMdFV1a1V9rU3fBYwAB2xmkWOB86vqnqr6NrAWeEZ7ra2qm6rqp8D5wLFJAhwBXNiWPxc4bmt3SJIkaabZomvokswDfg24shW9Psn1SVYk2beVHQDcMrDYula2qfJHAndU1b1jysfb/klJVidZvWHDhi1puiRJ0rQ16UCX5OHAx4E/qqofAu8BHg8cAtwK/M1o1XEWr60of3Bh1dlVdWhVHTpnzpzJNl2SJGla220ylZLMpgtzH66qTwBU1fcG5r8fuLi9XQccOLD4XGB9mx6v/AfAPkl2a710g/UlSZI0gcmMcg2wHBipqr8dKH/MQLXfAda06YuAlybZPclBwMHAVcDVwMFtROtD6AZOXFRVBawEXtSWPxH49LbtliRJ0swxmR66ZwMvB76e5NpW9qd0o1QPoTs9ejPwGoCquiHJBcA36EbInlxV9wEkeT1wGTALWFFVN7T1nQKcn+StwDV0AVKSJEmTMGGgq6pVjH+d26WbWWYIGBqn/NLxlquqm+hGwUqSJGkL+aQISZKknjPQSZIk9ZyBTpIkqecMdJIkST1noJMkSeo5A50kTdLw8DALFixg1qxZLFiwgOHh4alukiQBk3xShCTNdMPDwyxbtozly5ezcOFCVq1axZIlSwBYvHjxFLdO0kxnD50kTcLQ0BDLly9n0aJFzJ49m0WLFrF8+XKGhh50y01J2ukMdJI0CSMjIyxcuPABZQsXLmRkZGSKWiRJ9zPQSdIkzJ8/n1WrVj2gbNWqVcyfP3+KWiRJ9zPQSdIkLFu2jCVLlrBy5Uo2btzIypUrWbJkCcuWLZvqpkmSgyIkaTJGBz4sXbqUkZER5s+fz9DQkAMiJO0S7KGTJEnqOXvoJGkSvG2JpF2ZPXSSNAnetkTSrsxAJ0mT4G1LJO3KDHSSNAnetkTSrsxAJ0mT4G1LJO3KHBQhSZPgbUsk7coMdJI0SYsXLzbASdolecpVkiSp5wx0kiRJPWegkyRJ6jkDnSRJUs8Z6CRJknrOQCdJktRzBjpJkqSemzDQJTkwycokI0luSPKHrXy/JJcnubF93beVJ8k7kqxNcn2Spw2s68RW/8YkJw6UPz3J19sy70iSHbGzkiRJ09FkeujuBf6kquYDhwEnJ3kScCrwuao6GPhcew9wNHBwe50EvAe6AAicDjwTeAZw+mgIbHVOGljuqG3fNUmSpJlhwkBXVbdW1dfa9F3ACHAAcCxwbqt2LnBcmz4WOK86XwH2SfIY4HnA5VV1W1XdDlwOHNXmPaKqvlxVBZw3sC5JkiRNYIuuoUsyD/g14Erg0VV1K3ShD/jFVu0A4JaBxda1ss2VrxunfLztn5RkdZLVGzZs2JKmS5IkTVuTDnRJHg58HPijqvrh5qqOU1ZbUf7gwqqzq+rQqjp0zpw5EzVZkiRpRphUoEsymy7MfbiqPtGKv9dOl9K+fr+VrwMOHFh8LrB+gvK545RL0g6TZEpekrQj7DZRhTbidDkwUlV/OzDrIuBE4Mz29dMD5a9Pcj7dAIg7q+rWJJcBfzkwEOJI4LSqui3JXUkOozuVewLwzu2wb9Im7TX/VJ5y7qkTV+yxveYDPH+qm7HL6i7Z3TrzTr2Em8/0s5W065gw0AHPBl4OfD3Jta3sT+mC3AVJlgD/Bry4zbsUOAZYC/wYeCVAC25/AVzd6r2lqm5r068DzgH2BD7TXtIOc9fImdP+D/K8Uy+Z6iZIknaSCQNdVa1i/OvcAJ47Tv0CTt7EulYAK8YpXw0smKgtkiRJejCfFCFJktRzBjpJkqSem8w1dEPYEDsAABRmSURBVJIkSTvFVI0G35aBUrsCe+gkSdIuo6q26vW4Uy7e6mX7HubAQCdJktR7BjpJkqSeM9BJkiT1nIFOkiSp5xzlKknaJTz1jM9y590bd/p2d/ZTVfbeczbXnX7kTt2mpj8DnSRpl3Dn3Run/SP5wMfyacfwlKskSVLPGegkSZJ6zkAnSZLUcwY6SZKknjPQSZIk9ZyBTpIkqee8bckW2Gv+qTzl3FOnuhk71F7zAab/bQMkSZpODHRb4K6RM6f9PZK8P5IkSf3jKVdJkqSeM9BJkiT1nIFOkiSp5wx0kiRJPWegkyRJ6jkDnSRJUs8Z6CRJknrO+9BJkqTt6qlnfJY7796407e7s++luvees7nu9CN36jY3xUAnSZK2qzvv3jjtb8QPu9bN+D3lKkmS1HMTBrokK5J8P8magbI3J/lukmvb65iBeaclWZvkW0meN1B+VCtbm+TUgfKDklyZ5MYkH03ykO25g5IkSdPdZHrozgGOGqf87VV1SHtdCpDkScBLgSe3Zf4+yawks4B3A0cDTwIWt7oAZ7V1HQzcDizZlh2SJEmaaSYMdFX1ReC2Sa7vWOD8qrqnqr4NrAWe0V5rq+qmqvopcD5wbJIARwAXtuXPBY7bwn2QJEma0bblGrrXJ7m+nZLdt5UdANwyUGddK9tU+SOBO6rq3jHl40pyUpLVSVZv2LBhG5ouSZI0fWxtoHsP8HjgEOBW4G9aecapW1tRPq6qOruqDq2qQ+fMmbNlLZYkSZqmtuq2JVX1vdHpJO8HLm5v1wEHDlSdC6xv0+OV/wDYJ8lurZdusL4kSZImYat66JI8ZuDt7wCjI2AvAl6aZPckBwEHA1cBVwMHtxGtD6EbOHFRVRWwEnhRW/5E4NNb0yZJkqSZasIeuiTDwOHAo5KsA04HDk9yCN3p0ZuB1wBU1Q1JLgC+AdwLnFxV97X1vB64DJgFrKiqG9omTgHOT/JW4Bpg+XbbO0mSpBlgwkBXVYvHKd5k6KqqIWBonPJLgUvHKb+JbhRsL+xKd4XeEfbec/ZUN0GSJG0hH/21BXb2Y0zmnXrJjHh0iiRJ2jY++kuSJKnnDHSSJEk9Z6CTJEnqOQOdJElSzxnoJEmSes5AJ0mS1HMGOkmSpJ4z0EmSJPWcgU6SJKnnfFKEpF576hmf5c67N+707e7sxwDuvedsrjv9yJ26TUn9YaCT1Gt33r1xRjwib7o/R1rStvGUqyRJUs8Z6CRJknrOQCdJktRzBjpJkqSeM9BJkiT1nIFOkiSp57xtiSRJ2q72mn8qTzn31Kluxg6313yAXeO2SQa6nSDJ1i971tZvt6q2fmFJ2skMAdPHXSNnen/IncxAtxMYrCRpYoYAaet5DZ0kSVLPGegkSZJ6zkAnSZLUcwY6SZKknjPQSZIk9ZyBTpIkqecmDHRJViT5fpI1A2X7Jbk8yY3t676tPEnekWRtkuuTPG1gmRNb/RuTnDhQ/vQkX2/LvCPbctM2SZKkGWgyPXTnAEeNKTsV+FxVHQx8rr0HOBo4uL1OAt4DXQAETgeeCTwDOH00BLY6Jw0sN3ZbkiRJ2owJA11VfRG4bUzxscC5bfpc4LiB8vOq8xVgnySPAZ4HXF5Vt1XV7cDlwFFt3iOq6svV3X33vIF1SZIkaRK29hq6R1fVrQDt6y+28gOAWwbqrWtlmytfN075uJKclGR1ktUbNmzYyqZLkiRNL9t7UMR417/VVpSPq6rOrqpDq+rQOXPmbGUTJUmSppetDXTfa6dLaV+/38rXAQcO1JsLrJ+gfO445ZIkSZqkrQ10FwGjI1VPBD49UH5CG+16GHBnOyV7GXBkkn3bYIgjgcvavLuSHNZGt54wsC5JkiRNwm4TVUgyDBwOPCrJOrrRqmcCFyRZAvwb8OJW/VLgGGAt8GPglQBVdVuSvwCubvXeUlWjAy1eRzeSdk/gM+0lSZKkSZow0FXV4k3Meu44dQs4eRPrWQGsGKd8NbBgonZI29u8Uy+Z6ibsUHvvOXuqmyBJ2kkmDHTSdHTzmc/f6ducd+olU7JdSdL056O/JEmSes4eOkm9ttf8U3nKuadOXLHn9poPYA+vpPEZ6CT12l0jZ86IU9nT/ZpPSdvGU66SJEk9Z6CTJEnqOQOdJElSz3kNnbQFugeabMPyZ23dct0tHiVJGp+BTtoCBitJ0q7IQCdJ2mXMhNG8M+UpLh7LnctAJ0naJfgEl+nDY7nzOShCkiSp5wx0kiRJPWegkyRJ6jkDnSRJUs8Z6CRJknrOQCdJktRz3rZEkiTtMrbliTxb+zQe6P+N4w10kiRpl9H3YDVVDHSSes870kua6Qx0knrNO9JLkoMiJEmSes9AJ0mS1HMGOkmSpJ4z0EmSJPWcgU6SJKnnDHSSJEk9521LJEm9NxVPF/AGuNqVbFMPXZKbk3w9ybVJVrey/ZJcnuTG9nXfVp4k70iyNsn1SZ42sJ4TW/0bk5y4bbskSZppqmqnv6RdyfY45bqoqg6pqkPb+1OBz1XVwcDn2nuAo4GD2+sk4D3QBUDgdOCZwDOA00dDoCRJkia2I065Hgsc3qbPBa4ATmnl51X3b81XkuyT5DGt7uVVdRtAksuBo4DhHdA2SQK27RQdeJpO0q5lWwNdAZ9NUsD7qups4NFVdStAVd2a5Bdb3QOAWwaWXdfKNlX+IElOouvd47GPfew2Nl3STGawkjSdbGuge3ZVrW+h7fIk39xM3fH+Ha7NlD+4sAuMZwMceuih/jaWJEliG6+hq6r17ev3gU/SXQP3vXYqlfb1+636OuDAgcXnAus3Uy5JkqRJ2OpAl+RhSfYanQaOBNYAFwGjI1VPBD7dpi8CTmijXQ8D7mynZi8DjkyybxsMcWQrkyRJ0iRsyynXRwOfbBcW7wZ8pKr+KcnVwAVJlgD/Bry41b8UOAZYC/wYeCVAVd2W5C+Aq1u9t4wOkJAkSdLE0tcLgw899NBavXr1VDdDkiRpp0ny1YFbxf2cj/6SJEnqOQOdJElSzxnoJEmSes5AJ0mS1HMGOkmSpJ4z0EmSJPWcgU6SJKnnDHSSJEk919sbCyfZAHxnqtuxgz0K+MFUN0Lbjcdz+vBYTh8ey+ljphzLx1XVnLGFvQ10M0GS1ePdDVr95PGcPjyW04fHcvqY6cfSU66SJEk9Z6CTJEnqOQPdru3sqW6AtiuP5/ThsZw+PJbTx4w+ll5DJ0mS1HP20EmSJPWcgU6SJKnnDHQ9lGSfJP9j4P3+SS5s069I8q5NLPejndXG6SLJvCRrtuP6bk7yqLHHUNvPlh6zJMcledIEdQ5PcvEm5t2c5FFb2s5x1uPP5062vX++palkoOuZJLOAfYCfh4GqWl9VL5q6Vmk8SXbbzOwHHENNqeOAzQY6SdrVGei2QpITklyf5LokH0zyuCSfa2WfS/LYVu+cJO9I8qUkNyV5USv/aJJjBtZ3TpIXJpmV5K+TXN3W9Zo2//AkK5N8BPg6cCbw+CTXtvpj/8s8MMk/JflWktM3sQ9vGNjOGTvqs5omZiV5f5Ibknw2yZ5JXt0+v+uSfDzJQ+Hnx/Jvk6wEzkryyLbMNUneB6Stc+wxTPu6JsnXk7ykre/wJF9M8skk30jy3iT+3E5sUscsyf8H/Dbw1+1YPD7JE5L8c6v3tSSPb+t8eJILk3wzyYeTZGB7b0hyVXs9ASDJC5Jc2Y79Pyd5dCt/eJIPtON8fZIXDja89eB+Ocnzd8YHNR0keViSS9oxW5PkJUmenuQLSb6a5LIkj2l1n97qfRk4eWAd85L8SzvmX2vfG6M/g1ds5thrO0nyxiR/0KbfnuTzbfq5ST6U5D1JVref6zMGljumHZtV6f7mXtzK90vyqfZz9pUkv9rK35xkRTuuN41us/eqytcWvIAnA98CHtXe7wf8I3Bie/8q4FNt+hzgY3TB+UnA2lb+O8C5bfohwC3AnsBJwJta+e7AauAg4HDgP4GD2rx5wJqBNv38PfAK4FbgkW2da4BD27wfta9H0g3vTmvbxcB/nerPdld8tc/2XuCQ9v4C4GXAIwfqvBVYOnDMLwZmtffvAP68TT8fKLrH04w9hi8ELgdmAY8G/g14TDv2PwF+uc27HHjRVH8uu/JrK4/ZiwbmXQn8TpveA3hoOw53AnPbz8yXgYWtzs3AsjZ9AnBxm96X++8k8PvA37Tps4C/G9jevu3rj9qxvxL4zan+HPv0aj8/7x94vzfwJWBOe/8SYEWbvh74b236rwd+dz4U2KNNHwysbtObPPa+tvtxPAz4WJv+F+AqYDZwOvAaYL82bxZwBfCr7Wf0Fu7/+zg88DP4TuD0Nn0EcG2bfnP7/tid7vfxfwCzp3r/t/Xlf/pb7gjgwqr6AUBV3QY8C/hIm/9BYOFA/U9V1c+q6ht0v6wBPgMckWR34Gjgi1V1N13QOiHJtXS/1B9J94sF4Kqq+vYk23h5Vf1HW+cnxrSHtp0jgWuArwFPHNiOHuzbVXVtm/4qXWBY0P6b/zpwPF3QH/WxqrqvTf9X4EMAVXUJcPsmtrEQGK6q+6rqe8AXgF9v866qqpvaOod58PHUg23pMQMgyV7AAVX1SYCq+klV/bjNvqqq1lXVz4Br2zpHDQ98fVabngtc1rb3hoHt/Qbw7tEFq2r0e2I28DngjVV1+Vbt9cz1deA3kpyV5DnAgcAC4PL2+/RNwNwkewP7VNUX2nIfHFjHbOD97Xh9jAeeht/csdf281Xg6e3n8B668Hwo8By6gPd7Sb5G97fryXTH6InATQN/H4cH1reQdoyr6vPAI9v3AMAlVXVP+1v+fe7/+9xbm7vGR+MLXS/L5gzOv2fMslTVT5JcATyP7j/H4YH5S6vqsgdsMDmcrodussa2b+z7AG+rqvdtwTpnssFjeB9dz+c5wHFVdV2SV9D9Fz9q7LGazM0eN3cKZ6LjqQfb0mM2anPHYew6B39/1jjT7wT+tqouaj/Dbx7YxnjH8F66P2jPowv0mqSq+r9Jng4cA7yNrif7hqp61mC9JPuw6Z+f/wl8D3gqXU/cTwbmbe7Yazupqo1JbgZeSdeDdj2wCHg8cDfwv4Bfr6rbk5xD1zu3uZ/Z8eaNHv9pd0ztodtyn6P7L+GR0J2jp/vGe2mbfzywahLrOZ/um/Y5wGiAuwx4XZLZbd2/kuRh4yx7F7DXZtb9m+3agT3pLvj+1zHzLwNeleThbTsHJPnFSbRZ99sLuLUdq+M3U++Lo/OTHE13Gg4efAy/CLwk3XWUc+h69q5q856R5KB01869hMl9f+nBNnXMfn4squqHwLokxwEk2T3t+sgJvGTg65fb9N7Ad9v0iQN1Pwu8fvRNktHviaK7ZOOJSU6d7E6pG+kP/LiqPgT8b+CZwJwkz2rzZyd5clXdAdyZZLSXe/D7YG/g1tYL93K603ra+b5IF9y+SNcr91q6XtFH0P2zfGe7HvXoVv+bwC8nmdfev2TMukZ//x4O/KD9jE9LvU+kO1tV3ZBkCPhCkvvoun7/AFiR5A3ABrqgNpHPAucBF1XVT1vZP9B15X+tXXS7gS6QjW3DfyT513QDIT7DwOmbZhVdN/MTgI9U1eoxy382yXzgy+3a3h/RXWP0/Um0W50/ozst/h260z2bCthnAMPtNMEX6K6NG+8YvpHuVN11dH/Y31hV/57kiXQB4UzgKXS/oD65w/ZqetvUMTuf7lTbHwAvovtj/r4kbwE2Ai+exLp3T3Il3T/Ji1vZm4GPJfku8BW662Ghu37v3e3Y30f3PfIJgKq6L8lLgX9M8sOq+vtt2N+Z5Cl0A1t+RnfMXkfX4/mOdoptN+DvgBvofj+vSPJj7v9nGuDvgY8neTGwki07K6Lt51+AZcCXq+o/k/wE+JfWs34N3TG8idZRUVV3p7sF1D8l+QH3/yMM3c/gB5JcD/yYB/5jNe346C9pF9b+q/xfVfVbU90WSdoVJXl4Vf2odYS8G7ixqt4+1e3a2TzlKkmS+uzVbfDLDXSnzmfk9eH20EmSJPWcPXSSJEk9Z6CTJEnqOQOdJElSzxnoJE0refCzjbdk2cNHnwO5jW14Rbs32kT1bk7yqG3dniQZ6CRp+3sFMGGgk6TtxUAnaTraLcm5Sa5PcmGShyZ5bpJrknw9yYr2LGWSHJXkm0lWAb/byn4hyY3tqR2j79eO7U1rT/Y4J8matt7/meRFdM+f/HCSa5M8P8knB5b5zSSfGNvgJC9LclVb5n1JfFKBpEkz0Emajv4LcHZV/SrwQ+CP6Z7l+pKqegrdkwNel2QP4P3AC+gew/dLAO3xTx/i/kdD/QZwXXuQ96BDgAOqakFb7weq6kJgNXB8VR0CXArMHw2HdE8q+MDgStqTW14CPLstcx+bf6ScJD2AgU7SdHRLVY0+w/hDwHOBb1fV/21l59I9L/eJrfzG6m7K+aGBdawATmjTr2JMCGtuonuO5DuTHEUXHh+grfeDwMvaw+GfRfe4t0HPBZ4OXN1ukPpc4Je3ZIclzWw+y1XSdLQld0wft25V3ZLke0mOoHvY+/HtNOhXW5WLqurPkzwVeB5wMvB7dOFvrA8A/wj8BPhYVd07Zn6Ac6vqtC1otyT9nD10kqajxyZ5VpteDPwzMC/JE1rZy4EvAN8EDkry+IG6g/6Brtfugqq6r70Oaa8/b9fU/UJVfRz4M+Bpbbm7gL1GV1JV64H1wJvoTv2O9TngRUl+ESDJfkket7U7L2nmMdBJmo5GgBOTXA/sB7yd7tq1jyX5OvAz4L1V9RPgJOCSNijiO2PWcxHwcMY/3QpwAHBFO016DjDaw3YO8N42wGHPVvZhulPB3xi7klb2JuCzrc2XA4/Z4r2WNGP5LFdJ2oQkhwJvr6rnbId1vQu4pqqWb3vLJOmBvIZOksaR5FTgdWyH0aZJvgr8J/An27ouSRqPPXSSJEk95zV0kiRJPWegkyRJ6jkDnSRJUs8Z6CRJknrOQCdJktRz/z9pGjgaatK+dQAAAABJRU5ErkJggg==\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "df.boxplot(by='body-style', column=['price'], grid=False, rot=0, figsize=(10,6))\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Insight: body-style is a good predictor, mostly hactchback and wagon are the budget cars" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Task 5: Plot a jointplot that shows the relationship between the 'horsepower' and 'price' of the car." + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "sns.jointplot(x='horsepower', y='price', data=df, kind='reg')\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Insight: horsepower seems to have a linear relationship with price, so its a good predictor" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Task 6: Plot the correlation heatmap of the data." + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "symboling False\n", + "wheel-base False\n", + "length True\n", + "width True\n", + "height False\n", + "horsepower True\n", + "peak-rpm False\n", + "highway-mpg True\n", + "city-mpg True\n", + "price True\n", + "Name: price, dtype: bool" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# one way to check the correlation is to use df.corr() and check for a threshold like 0.7\n", + "(df.corr().abs()>0.6)['price']" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "# if you'd like to see correlation with all the numeric columns then use heatmap\n", + "plt.figure(figsize=(12,8))\n", + "sns.heatmap(df.corr())\n", + "plt.title('Correlation heatmap')\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Part B: Data Cleaning and Feature Engineering" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Task 7: Load the data stored in `data_2` using `.read_csv()` api.\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [], + "source": [ + "df = pd.read_csv(\"../data/data_2.csv\")" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "RangeIndex: 205 entries, 0 to 204\n", + "Data columns (total 15 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 symboling 205 non-null int64 \n", + " 1 normalized-losses 205 non-null object \n", + " 2 make 205 non-null object \n", + " 3 fuel-type 205 non-null object \n", + " 4 body-style 205 non-null object \n", + " 5 drive-wheels 205 non-null object \n", + " 6 engine-location 205 non-null object \n", + " 7 width 205 non-null float64\n", + " 8 height 205 non-null float64\n", + " 9 engine-type 205 non-null object \n", + " 10 engine-size 205 non-null int64 \n", + " 11 horsepower 205 non-null object \n", + " 12 city-mpg 205 non-null int64 \n", + " 13 highway-mpg 205 non-null int64 \n", + " 14 price 205 non-null int64 \n", + "dtypes: float64(2), int64(5), object(8)\n", + "memory usage: 24.1+ KB\n" + ] + } + ], + "source": [ + "df.info()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Insight: Seems no null values or is it?" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
symbolingnormalized-lossesmakefuel-typebody-styledrive-wheelsengine-locationwidthheightengine-typeengine-sizehorsepowercity-mpghighway-mpgprice
03?alfa-romerogasconvertiblerwdfront64.148.8dohc130111212713495
13?alfa-romerogasconvertiblerwdfront64.148.8dohc130111212716500
21?alfa-romerogashatchbackrwdfront65.552.4ohcv152154192616500
32164audigassedanfwdfront66.254.3ohc109102243013950
42164audigassedan4wdfront66.454.3ohc136115182217450
\n", + "
" + ], + "text/plain": [ + " symboling normalized-losses make fuel-type body-style \\\n", + "0 3 ? alfa-romero gas convertible \n", + "1 3 ? alfa-romero gas convertible \n", + "2 1 ? alfa-romero gas hatchback \n", + "3 2 164 audi gas sedan \n", + "4 2 164 audi gas sedan \n", + "\n", + " drive-wheels engine-location width height engine-type engine-size \\\n", + "0 rwd front 64.1 48.8 dohc 130 \n", + "1 rwd front 64.1 48.8 dohc 130 \n", + "2 rwd front 65.5 52.4 ohcv 152 \n", + "3 fwd front 66.2 54.3 ohc 109 \n", + "4 4wd front 66.4 54.3 ohc 136 \n", + "\n", + " horsepower city-mpg highway-mpg price \n", + "0 111 21 27 13495 \n", + "1 111 21 27 16500 \n", + "2 154 19 26 16500 \n", + "3 102 24 30 13950 \n", + "4 115 18 22 17450 " + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# a quick peek in the data shows missing values are actually marked as '?'\n", + "df.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Task 8: Impute the missing values of the numerical data with mean of the particular column (Make sure you replace \"?\" by \"NaN\" before Imputing).\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [], + "source": [ + "# replace the \"?\" with special string \"NaN\", because thats how Imputer likes it\n", + "df.replace('?', 'NaN', inplace=True) # this is not np.NaN" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [], + "source": [ + "# initialize the Imputer to replace missing values with the mean of the column\n", + "numeric_imp = Imputer(missing_values=\"NaN\", strategy='mean', axis=0)" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "((205,), (205,), (205, 1))" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# remember fit_transform wants a 2D array \n", + "df.horsepower.shape , df['horsepower'].shape, df[['horsepower']].shape" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [], + "source": [ + "# Lets find the mean i.e. do the fit and then replace missing values with it i.e. transform\n", + "df.horsepower = numeric_imp.fit_transform(df[['horsepower']])" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [], + "source": [ + "# and do the same for normalized-losses column\n", + "df['normalized-losses'] = numeric_imp.fit_transform(df[['normalized-losses']])" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "RangeIndex: 205 entries, 0 to 204\n", + "Data columns (total 15 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 symboling 205 non-null int64 \n", + " 1 normalized-losses 205 non-null float64\n", + " 2 make 205 non-null object \n", + " 3 fuel-type 205 non-null object \n", + " 4 body-style 205 non-null object \n", + " 5 drive-wheels 205 non-null object \n", + " 6 engine-location 205 non-null object \n", + " 7 width 205 non-null float64\n", + " 8 height 205 non-null float64\n", + " 9 engine-type 205 non-null object \n", + " 10 engine-size 205 non-null int64 \n", + " 11 horsepower 205 non-null float64\n", + " 12 city-mpg 205 non-null int64 \n", + " 13 highway-mpg 205 non-null int64 \n", + " 14 price 205 non-null int64 \n", + "dtypes: float64(4), int64(5), object(6)\n", + "memory usage: 24.1+ KB\n" + ] + } + ], + "source": [ + "df.info()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Now, normalized-losses and horsepower are numeric types" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Task 9: Check the skewness of the numeric features and apply square root transformation on features with skewness greater than 1.\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
symbolingnormalized-lossesmakefuel-typebody-styledrive-wheelsengine-locationwidthheightengine-typeengine-sizehorsepowercity-mpghighway-mpgprice
03122.0alfa-romerogasconvertiblerwdfront64.148.8dohc130111.0212713495
13122.0alfa-romerogasconvertiblerwdfront64.148.8dohc130111.0212716500
21122.0alfa-romerogashatchbackrwdfront65.552.4ohcv152154.0192616500
32164.0audigassedanfwdfront66.254.3ohc109102.0243013950
42164.0audigassedan4wdfront66.454.3ohc136115.0182217450
\n", + "
" + ], + "text/plain": [ + " symboling normalized-losses make fuel-type body-style \\\n", + "0 3 122.0 alfa-romero gas convertible \n", + "1 3 122.0 alfa-romero gas convertible \n", + "2 1 122.0 alfa-romero gas hatchback \n", + "3 2 164.0 audi gas sedan \n", + "4 2 164.0 audi gas sedan \n", + "\n", + " drive-wheels engine-location width height engine-type engine-size \\\n", + "0 rwd front 64.1 48.8 dohc 130 \n", + "1 rwd front 64.1 48.8 dohc 130 \n", + "2 rwd front 65.5 52.4 ohcv 152 \n", + "3 fwd front 66.2 54.3 ohc 109 \n", + "4 4wd front 66.4 54.3 ohc 136 \n", + "\n", + " horsepower city-mpg highway-mpg price \n", + "0 111.0 21 27 13495 \n", + "1 111.0 21 27 16500 \n", + "2 154.0 19 26 16500 \n", + "3 102.0 24 30 13950 \n", + "4 115.0 18 22 17450 " + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# before sqrt transformation\n", + "df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "transforming engine-size\n", + "transforming horsepower\n", + "transforming price\n" + ] + } + ], + "source": [ + "# perform sqrt transformation to numeric columns\n", + "numeric_featuers = df.select_dtypes(exclude=['object']).columns\n", + "for feature in numeric_featuers:\n", + " if skew(df[feature]) > 1:\n", + " print('transforming', feature)\n", + " df[feature] = np.sqrt(df[feature])\n" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
symbolingnormalized-lossesmakefuel-typebody-styledrive-wheelsengine-locationwidthheightengine-typeengine-sizehorsepowercity-mpghighway-mpgprice
03122.0alfa-romerogasconvertiblerwdfront64.148.8dohc11.40175410.5356542127116.167982
13122.0alfa-romerogasconvertiblerwdfront64.148.8dohc11.40175410.5356542127128.452326
21122.0alfa-romerogashatchbackrwdfront65.552.4ohcv12.32882812.4096741926128.452326
32164.0audigassedanfwdfront66.254.3ohc10.44030710.0995052430118.110118
42164.0audigassedan4wdfront66.454.3ohc11.66190410.7238051822132.098448
\n", + "
" + ], + "text/plain": [ + " symboling normalized-losses make fuel-type body-style \\\n", + "0 3 122.0 alfa-romero gas convertible \n", + "1 3 122.0 alfa-romero gas convertible \n", + "2 1 122.0 alfa-romero gas hatchback \n", + "3 2 164.0 audi gas sedan \n", + "4 2 164.0 audi gas sedan \n", + "\n", + " drive-wheels engine-location width height engine-type engine-size \\\n", + "0 rwd front 64.1 48.8 dohc 11.401754 \n", + "1 rwd front 64.1 48.8 dohc 11.401754 \n", + "2 rwd front 65.5 52.4 ohcv 12.328828 \n", + "3 fwd front 66.2 54.3 ohc 10.440307 \n", + "4 4wd front 66.4 54.3 ohc 11.661904 \n", + "\n", + " horsepower city-mpg highway-mpg price \n", + "0 10.535654 21 27 116.167982 \n", + "1 10.535654 21 27 128.452326 \n", + "2 12.409674 19 26 128.452326 \n", + "3 10.099505 24 30 118.110118 \n", + "4 10.723805 18 22 132.098448 " + ] + }, + "execution_count": 25, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# after sqrt transformation\n", + "df.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Task 10: Label Encode the categorical features.\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
symbolingnormalized-lossesmakefuel-typebody-styledrive-wheelsengine-locationwidthheightengine-typeengine-sizehorsepowercity-mpghighway-mpgprice
03122.0alfa-romerogasconvertiblerwdfront64.148.8dohc11.40175410.5356542127116.167982
13122.0alfa-romerogasconvertiblerwdfront64.148.8dohc11.40175410.5356542127128.452326
21122.0alfa-romerogashatchbackrwdfront65.552.4ohcv12.32882812.4096741926128.452326
32164.0audigassedanfwdfront66.254.3ohc10.44030710.0995052430118.110118
42164.0audigassedan4wdfront66.454.3ohc11.66190410.7238051822132.098448
\n", + "
" + ], + "text/plain": [ + " symboling normalized-losses make fuel-type body-style \\\n", + "0 3 122.0 alfa-romero gas convertible \n", + "1 3 122.0 alfa-romero gas convertible \n", + "2 1 122.0 alfa-romero gas hatchback \n", + "3 2 164.0 audi gas sedan \n", + "4 2 164.0 audi gas sedan \n", + "\n", + " drive-wheels engine-location width height engine-type engine-size \\\n", + "0 rwd front 64.1 48.8 dohc 11.401754 \n", + "1 rwd front 64.1 48.8 dohc 11.401754 \n", + "2 rwd front 65.5 52.4 ohcv 12.328828 \n", + "3 fwd front 66.2 54.3 ohc 10.440307 \n", + "4 4wd front 66.4 54.3 ohc 11.661904 \n", + "\n", + " horsepower city-mpg highway-mpg price \n", + "0 10.535654 21 27 116.167982 \n", + "1 10.535654 21 27 128.452326 \n", + "2 12.409674 19 26 128.452326 \n", + "3 10.099505 24 30 118.110118 \n", + "4 10.723805 18 22 132.098448 " + ] + }, + "execution_count": 26, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# before label encoding\n", + "df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": {}, + "outputs": [], + "source": [ + "# perform label encoding to categorical columns\n", + "categorical_features = df.select_dtypes(include=['object']).columns\n", + "for feature in categorical_features:\n", + " le = LabelEncoder()\n", + " df[feature] = le.fit_transform(df[feature])" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
symbolingnormalized-lossesmakefuel-typebody-styledrive-wheelsengine-locationwidthheightengine-typeengine-sizehorsepowercity-mpghighway-mpgprice
03122.00102064.148.8011.40175410.5356542127116.167982
13122.00102064.148.8011.40175410.5356542127128.452326
21122.00122065.552.4512.32882812.4096741926128.452326
32164.01131066.254.3310.44030710.0995052430118.110118
42164.01130066.454.3311.66190410.7238051822132.098448
\n", + "
" + ], + "text/plain": [ + " symboling normalized-losses make fuel-type body-style drive-wheels \\\n", + "0 3 122.0 0 1 0 2 \n", + "1 3 122.0 0 1 0 2 \n", + "2 1 122.0 0 1 2 2 \n", + "3 2 164.0 1 1 3 1 \n", + "4 2 164.0 1 1 3 0 \n", + "\n", + " engine-location width height engine-type engine-size horsepower \\\n", + "0 0 64.1 48.8 0 11.401754 10.535654 \n", + "1 0 64.1 48.8 0 11.401754 10.535654 \n", + "2 0 65.5 52.4 5 12.328828 12.409674 \n", + "3 0 66.2 54.3 3 10.440307 10.099505 \n", + "4 0 66.4 54.3 3 11.661904 10.723805 \n", + "\n", + " city-mpg highway-mpg price \n", + "0 21 27 116.167982 \n", + "1 21 27 128.452326 \n", + "2 19 26 128.452326 \n", + "3 24 30 118.110118 \n", + "4 18 22 132.098448 " + ] + }, + "execution_count": 28, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# after label encoding\n", + "df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "RangeIndex: 205 entries, 0 to 204\n", + "Data columns (total 15 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 symboling 205 non-null int64 \n", + " 1 normalized-losses 205 non-null float64\n", + " 2 make 205 non-null int64 \n", + " 3 fuel-type 205 non-null int64 \n", + " 4 body-style 205 non-null int64 \n", + " 5 drive-wheels 205 non-null int64 \n", + " 6 engine-location 205 non-null int64 \n", + " 7 width 205 non-null float64\n", + " 8 height 205 non-null float64\n", + " 9 engine-type 205 non-null int64 \n", + " 10 engine-size 205 non-null float64\n", + " 11 horsepower 205 non-null float64\n", + " 12 city-mpg 205 non-null int64 \n", + " 13 highway-mpg 205 non-null int64 \n", + " 14 price 205 non-null float64\n", + "dtypes: float64(6), int64(9)\n", + "memory usage: 24.1 KB\n" + ] + } + ], + "source": [ + "df.info()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Now, all the features are numerical, we are almost ready to do model training" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Task 11: Combine the 'height' and 'width' to make a new feature 'area' of the frame of the car." + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 3128.08\n", + "1 3128.08\n", + "2 3432.20\n", + "3 3594.66\n", + "4 3605.52\n", + " ... \n", + "200 3823.95\n", + "201 3818.40\n", + "202 3823.95\n", + "203 3823.95\n", + "204 3823.95\n", + "Name: area, Length: 205, dtype: float64" + ] + }, + "execution_count": 30, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Since height and width usually related, let us engineer a new feature called area\n", + "df['area'] = df.height * df.width\n", + "df.area" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "---" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.4" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/Data_cleaning_with_obesity_data/notebook/Data_cleaning_with_obeseity_data-MK.ipynb b/Data_cleaning_with_obesity_data/notebook/Data_cleaning_with_obeseity_data-MK.ipynb new file mode 100644 index 0000000..69312ea --- /dev/null +++ b/Data_cleaning_with_obesity_data/notebook/Data_cleaning_with_obeseity_data-MK.ipynb @@ -0,0 +1,6277 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Data Cleaning with Obeseity data \n", + "\n", + "Data cleaning is such an integral part of data analysis.Unlike on Kaggle,almost all data you see in the real world would be dirty and messy. Some even say data cleaning would take 80% of data analysis time.\n", + "\n", + "The very fisrt step of any given data analysis project would be getting to know your data especially when you are dealing a messy one.\n", + "\n", + "So, lets clean this messy data to start our analysis" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Import Libraries" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import seaborn as sns\n", + "import matplotlib.pyplot as plt" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Read and Know Your Data\n", + "\n", + "Take a look at our data as below, the data is quite obscure,it's hard to understand for a human,not mentioned for a computer.in this kind of situation,you have ways to get acquaintance with your data as follows:\n", + "\n", + "1. Go to the data source page [WHO OBESITY DATA](https://apps.who.int/gho/data/node.main.A900A?lang=en)\n", + "2. If solution 1 doesn not work or hard to do, you can always go to ask data curator directly." + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Unnamed: 020162016.12016.220152015.12015.220142014.12014.2...1978.219771977.11977.219761976.11976.219751975.11975.2
0NaNPrevalence of obesity among adults, BMI &Great...Prevalence of obesity among adults, BMI &Great...Prevalence of obesity among adults, BMI &Great...Prevalence of obesity among adults, BMI &Great...Prevalence of obesity among adults, BMI &Great...Prevalence of obesity among adults, BMI &Great...Prevalence of obesity among adults, BMI &Great...Prevalence of obesity among adults, BMI &Great...Prevalence of obesity among adults, BMI &Great......Prevalence of obesity among adults, BMI &Great...Prevalence of obesity among adults, BMI &Great...Prevalence of obesity among adults, BMI &Great...Prevalence of obesity among adults, BMI &Great...Prevalence of obesity among adults, BMI &Great...Prevalence of obesity among adults, BMI &Great...Prevalence of obesity among adults, BMI &Great...Prevalence of obesity among adults, BMI &Great...Prevalence of obesity among adults, BMI &Great...Prevalence of obesity among adults, BMI &Great...
1NaN18+ years18+ years18+ years18+ years18+ years18+ years18+ years18+ years18+ years...18+ years18+ years18+ years18+ years18+ years18+ years18+ years18+ years18+ years18+ years
2CountryBoth sexesMaleFemaleBoth sexesMaleFemaleBoth sexesMaleFemale...FemaleBoth sexesMaleFemaleBoth sexesMaleFemaleBoth sexesMaleFemale
3Afghanistan5.5 [3.4-8.1]3.2 [1.3-6.4]7.6 [4.3-12.4]5.2 [3.3-7.7]3.0 [1.3-6.0]7.3 [4.1-11.8]4.9 [3.1-7.3]2.8 [1.2-5.6]7.0 [4.0-11.3]...0.9 [0.3-2.2]0.6 [0.2-1.2]0.2 [0.0-0.7]0.9 [0.3-2.1]0.5 [0.2-1.1]0.2 [0.0-0.7]0.8 [0.2-2.0]0.5 [0.2-1.1]0.2 [0.0-0.6]0.8 [0.2-2.0]
4Albania21.7 [17.0-26.7]21.6 [14.8-29.0]21.8 [15.3-28.9]21.1 [16.6-26.0]20.9 [14.4-28.1]21.3 [15.1-28.1]20.5 [16.2-25.1]20.2 [13.9-27.3]20.8 [14.9-27.4]...9.1 [4.6-15.5]6.8 [4.0-10.7]4.8 [2.0-9.3]8.9 [4.3-15.4]6.7 [3.8-10.6]4.6 [1.8-9.2]8.8 [4.1-15.4]6.5 [3.6-10.5]4.4 [1.7-9.2]8.6 [3.9-15.4]
\n", + "

5 rows × 127 columns

\n", + "
" + ], + "text/plain": [ + " Unnamed: 0 2016 \\\n", + "0 NaN Prevalence of obesity among adults, BMI &Great... \n", + "1 NaN 18+ years \n", + "2 Country Both sexes \n", + "3 Afghanistan 5.5 [3.4-8.1] \n", + "4 Albania 21.7 [17.0-26.7] \n", + "\n", + " 2016.1 \\\n", + "0 Prevalence of obesity among adults, BMI &Great... \n", + "1 18+ years \n", + "2 Male \n", + "3 3.2 [1.3-6.4] \n", + "4 21.6 [14.8-29.0] \n", + "\n", + " 2016.2 \\\n", + "0 Prevalence of obesity among adults, BMI &Great... \n", + "1 18+ years \n", + "2 Female \n", + "3 7.6 [4.3-12.4] \n", + "4 21.8 [15.3-28.9] \n", + "\n", + " 2015 \\\n", + "0 Prevalence of obesity among adults, BMI &Great... \n", + "1 18+ years \n", + "2 Both sexes \n", + "3 5.2 [3.3-7.7] \n", + "4 21.1 [16.6-26.0] \n", + "\n", + " 2015.1 \\\n", + "0 Prevalence of obesity among adults, BMI &Great... \n", + "1 18+ years \n", + "2 Male \n", + "3 3.0 [1.3-6.0] \n", + "4 20.9 [14.4-28.1] \n", + "\n", + " 2015.2 \\\n", + "0 Prevalence of obesity among adults, BMI &Great... \n", + "1 18+ years \n", + "2 Female \n", + "3 7.3 [4.1-11.8] \n", + "4 21.3 [15.1-28.1] \n", + "\n", + " 2014 \\\n", + "0 Prevalence of obesity among adults, BMI &Great... \n", + "1 18+ years \n", + "2 Both sexes \n", + "3 4.9 [3.1-7.3] \n", + "4 20.5 [16.2-25.1] \n", + "\n", + " 2014.1 \\\n", + "0 Prevalence of obesity among adults, BMI &Great... \n", + "1 18+ years \n", + "2 Male \n", + "3 2.8 [1.2-5.6] \n", + "4 20.2 [13.9-27.3] \n", + "\n", + " 2014.2 ... \\\n", + "0 Prevalence of obesity among adults, BMI &Great... ... \n", + "1 18+ years ... \n", + "2 Female ... \n", + "3 7.0 [4.0-11.3] ... \n", + "4 20.8 [14.9-27.4] ... \n", + "\n", + " 1978.2 \\\n", + "0 Prevalence of obesity among adults, BMI &Great... \n", + "1 18+ years \n", + "2 Female \n", + "3 0.9 [0.3-2.2] \n", + "4 9.1 [4.6-15.5] \n", + "\n", + " 1977 \\\n", + "0 Prevalence of obesity among adults, BMI &Great... \n", + "1 18+ years \n", + "2 Both sexes \n", + "3 0.6 [0.2-1.2] \n", + "4 6.8 [4.0-10.7] \n", + "\n", + " 1977.1 \\\n", + "0 Prevalence of obesity among adults, BMI &Great... \n", + "1 18+ years \n", + "2 Male \n", + "3 0.2 [0.0-0.7] \n", + "4 4.8 [2.0-9.3] \n", + "\n", + " 1977.2 \\\n", + "0 Prevalence of obesity among adults, BMI &Great... \n", + "1 18+ years \n", + "2 Female \n", + "3 0.9 [0.3-2.1] \n", + "4 8.9 [4.3-15.4] \n", + "\n", + " 1976 \\\n", + "0 Prevalence of obesity among adults, BMI &Great... \n", + "1 18+ years \n", + "2 Both sexes \n", + "3 0.5 [0.2-1.1] \n", + "4 6.7 [3.8-10.6] \n", + "\n", + " 1976.1 \\\n", + "0 Prevalence of obesity among adults, BMI &Great... \n", + "1 18+ years \n", + "2 Male \n", + "3 0.2 [0.0-0.7] \n", + "4 4.6 [1.8-9.2] \n", + "\n", + " 1976.2 \\\n", + "0 Prevalence of obesity among adults, BMI &Great... \n", + "1 18+ years \n", + "2 Female \n", + "3 0.8 [0.2-2.0] \n", + "4 8.8 [4.1-15.4] \n", + "\n", + " 1975 \\\n", + "0 Prevalence of obesity among adults, BMI &Great... \n", + "1 18+ years \n", + "2 Both sexes \n", + "3 0.5 [0.2-1.1] \n", + "4 6.5 [3.6-10.5] \n", + "\n", + " 1975.1 \\\n", + "0 Prevalence of obesity among adults, BMI &Great... \n", + "1 18+ years \n", + "2 Male \n", + "3 0.2 [0.0-0.6] \n", + "4 4.4 [1.7-9.2] \n", + "\n", + " 1975.2 \n", + "0 Prevalence of obesity among adults, BMI &Great... \n", + "1 18+ years \n", + "2 Female \n", + "3 0.8 [0.2-2.0] \n", + "4 8.6 [3.9-15.4] \n", + "\n", + "[5 rows x 127 columns]" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data = pd.read_csv('../data/data.csv')\n", + "data.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Why and how the data is messy?\n", + "\n", + "In most cases, data is collected by human or machines,a tiny glich would cause a long strip of bad data.if the data is collected by human, then it is a big chance that it would be messy. data can be dirty in many different ways,but mostly fall into those categories :\n", + "\n", + "1. Missing data : like NAN\n", + "2. Validity of data : like 2016.1 / 2016.2 in the column\n", + "3. Outliers : like if a BMI entry is greater than 100\n", + "4. Consistency of data : the unit of every entry is not the same\n", + "5. Correctness of data: we are not gonna go through this ,but it is an important part of doing analysis in bussiness world, basically you need external data source or database to cross check the data in your hand because as we always say:\n", + "\n", + "You dont know what you dont know\n", + "\n", + "6. Data is in wide form not in long form : we are gonna go deeper about this one.\n", + "\n", + "Are you ready, it's time to get our hands dirt" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## long form VS wide form\n", + "\n", + "The original data we have here is in wide form which means the form is very wide literally.\n", + "\n", + "The .1 .2 in year number stand for gender, we gonna fix that later.\n", + "\n", + "Wide data is not easy to analyze or stored effectively in computer, so we want to change it as soon as we can. go to read this tidy-data if you want to know more.\n", + "\n", + "![img](https://d33wubrfki0l68.cloudfront.net/6f1ddb544fc5c69a2478e444ab8112fb0eea23f8/91adc/images/tidy-1.png)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Rename the columns appropriately and unpivot the data in the desirable format using pandas melt()." + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Unnamed: 020162016.12016.220152015.12015.220142014.12014.2...1978.219771977.11977.219761976.11976.219751975.11975.2
3Afghanistan5.5 [3.4-8.1]3.2 [1.3-6.4]7.6 [4.3-12.4]5.2 [3.3-7.7]3.0 [1.3-6.0]7.3 [4.1-11.8]4.9 [3.1-7.3]2.8 [1.2-5.6]7.0 [4.0-11.3]...0.9 [0.3-2.2]0.6 [0.2-1.2]0.2 [0.0-0.7]0.9 [0.3-2.1]0.5 [0.2-1.1]0.2 [0.0-0.7]0.8 [0.2-2.0]0.5 [0.2-1.1]0.2 [0.0-0.6]0.8 [0.2-2.0]
4Albania21.7 [17.0-26.7]21.6 [14.8-29.0]21.8 [15.3-28.9]21.1 [16.6-26.0]20.9 [14.4-28.1]21.3 [15.1-28.1]20.5 [16.2-25.1]20.2 [13.9-27.3]20.8 [14.9-27.4]...9.1 [4.6-15.5]6.8 [4.0-10.7]4.8 [2.0-9.3]8.9 [4.3-15.4]6.7 [3.8-10.6]4.6 [1.8-9.2]8.8 [4.1-15.4]6.5 [3.6-10.5]4.4 [1.7-9.2]8.6 [3.9-15.4]
5Algeria27.4 [22.5-32.7]19.9 [13.6-27.1]34.9 [27.6-42.7]26.7 [21.9-31.8]19.2 [13.2-26.1]34.2 [27.1-41.7]26.0 [21.4-30.9]18.5 [12.7-25.0]33.6 [26.7-40.7]...11.8 [6.5-18.6]7.4 [4.3-11.3]3.1 [1.2-6.2]11.4 [6.2-18.4]7.2 [4.1-11.1]2.9 [1.1-6.1]11.1 [5.8-18.2]6.9 [3.9-10.9]2.8 [1.0-6.0]10.7 [5.5-18.0]
6Andorra25.6 [20.1-31.3]25.9 [18.0-34.3]25.3 [17.7-33.7]25.4 [20.1-31.0]25.5 [17.8-33.8]25.2 [17.7-33.4]25.2 [20.0-30.7]25.2 [17.6-33.3]25.1 [17.8-33.1]...17.5 [10.9-25.3]14.0 [9.6-19.1]10.7 [5.6-17.4]16.9 [10.4-24.8]13.5 [9.1-18.6]10.2 [5.2-16.9]16.4 [9.8-24.4]12.9 [8.6-18.1]9.7 [4.7-16.3]15.8 [9.2-23.9]
7Angola8.2 [5.1-12.2]4.0 [1.6-7.9]12.1 [6.8-19.0]7.9 [4.9-11.7]3.8 [1.5-7.3]11.6 [6.5-18.2]7.5 [4.7-11.2]3.6 [1.4-6.9]11.1 [6.2-17.5]...1.6 [0.5-3.7]0.9 [0.3-2.0]0.3 [0.0-0.9]1.5 [0.4-3.6]0.9 [0.3-2.0]0.3 [0.0-0.9]1.4 [0.4-3.5]0.8 [0.3-1.9]0.2 [0.0-0.8]1.4 [0.4-3.4]
\n", + "

5 rows × 127 columns

\n", + "
" + ], + "text/plain": [ + " Unnamed: 0 2016 2016.1 2016.2 \\\n", + "3 Afghanistan 5.5 [3.4-8.1] 3.2 [1.3-6.4] 7.6 [4.3-12.4] \n", + "4 Albania 21.7 [17.0-26.7] 21.6 [14.8-29.0] 21.8 [15.3-28.9] \n", + "5 Algeria 27.4 [22.5-32.7] 19.9 [13.6-27.1] 34.9 [27.6-42.7] \n", + "6 Andorra 25.6 [20.1-31.3] 25.9 [18.0-34.3] 25.3 [17.7-33.7] \n", + "7 Angola 8.2 [5.1-12.2] 4.0 [1.6-7.9] 12.1 [6.8-19.0] \n", + "\n", + " 2015 2015.1 2015.2 2014 \\\n", + "3 5.2 [3.3-7.7] 3.0 [1.3-6.0] 7.3 [4.1-11.8] 4.9 [3.1-7.3] \n", + "4 21.1 [16.6-26.0] 20.9 [14.4-28.1] 21.3 [15.1-28.1] 20.5 [16.2-25.1] \n", + "5 26.7 [21.9-31.8] 19.2 [13.2-26.1] 34.2 [27.1-41.7] 26.0 [21.4-30.9] \n", + "6 25.4 [20.1-31.0] 25.5 [17.8-33.8] 25.2 [17.7-33.4] 25.2 [20.0-30.7] \n", + "7 7.9 [4.9-11.7] 3.8 [1.5-7.3] 11.6 [6.5-18.2] 7.5 [4.7-11.2] \n", + "\n", + " 2014.1 2014.2 ... 1978.2 1977 \\\n", + "3 2.8 [1.2-5.6] 7.0 [4.0-11.3] ... 0.9 [0.3-2.2] 0.6 [0.2-1.2] \n", + "4 20.2 [13.9-27.3] 20.8 [14.9-27.4] ... 9.1 [4.6-15.5] 6.8 [4.0-10.7] \n", + "5 18.5 [12.7-25.0] 33.6 [26.7-40.7] ... 11.8 [6.5-18.6] 7.4 [4.3-11.3] \n", + "6 25.2 [17.6-33.3] 25.1 [17.8-33.1] ... 17.5 [10.9-25.3] 14.0 [9.6-19.1] \n", + "7 3.6 [1.4-6.9] 11.1 [6.2-17.5] ... 1.6 [0.5-3.7] 0.9 [0.3-2.0] \n", + "\n", + " 1977.1 1977.2 1976 1976.1 \\\n", + "3 0.2 [0.0-0.7] 0.9 [0.3-2.1] 0.5 [0.2-1.1] 0.2 [0.0-0.7] \n", + "4 4.8 [2.0-9.3] 8.9 [4.3-15.4] 6.7 [3.8-10.6] 4.6 [1.8-9.2] \n", + "5 3.1 [1.2-6.2] 11.4 [6.2-18.4] 7.2 [4.1-11.1] 2.9 [1.1-6.1] \n", + "6 10.7 [5.6-17.4] 16.9 [10.4-24.8] 13.5 [9.1-18.6] 10.2 [5.2-16.9] \n", + "7 0.3 [0.0-0.9] 1.5 [0.4-3.6] 0.9 [0.3-2.0] 0.3 [0.0-0.9] \n", + "\n", + " 1976.2 1975 1975.1 1975.2 \n", + "3 0.8 [0.2-2.0] 0.5 [0.2-1.1] 0.2 [0.0-0.6] 0.8 [0.2-2.0] \n", + "4 8.8 [4.1-15.4] 6.5 [3.6-10.5] 4.4 [1.7-9.2] 8.6 [3.9-15.4] \n", + "5 11.1 [5.8-18.2] 6.9 [3.9-10.9] 2.8 [1.0-6.0] 10.7 [5.5-18.0] \n", + "6 16.4 [9.8-24.4] 12.9 [8.6-18.1] 9.7 [4.7-16.3] 15.8 [9.2-23.9] \n", + "7 1.4 [0.4-3.5] 0.8 [0.3-1.9] 0.2 [0.0-0.8] 1.4 [0.4-3.4] \n", + "\n", + "[5 rows x 127 columns]" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df = data.copy()\n", + "df.drop([0,1,2], inplace=True)\n", + "df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
country20162016.12016.220152015.12015.220142014.12014.2...1978.219771977.11977.219761976.11976.219751975.11975.2
3Afghanistan5.5 [3.4-8.1]3.2 [1.3-6.4]7.6 [4.3-12.4]5.2 [3.3-7.7]3.0 [1.3-6.0]7.3 [4.1-11.8]4.9 [3.1-7.3]2.8 [1.2-5.6]7.0 [4.0-11.3]...0.9 [0.3-2.2]0.6 [0.2-1.2]0.2 [0.0-0.7]0.9 [0.3-2.1]0.5 [0.2-1.1]0.2 [0.0-0.7]0.8 [0.2-2.0]0.5 [0.2-1.1]0.2 [0.0-0.6]0.8 [0.2-2.0]
4Albania21.7 [17.0-26.7]21.6 [14.8-29.0]21.8 [15.3-28.9]21.1 [16.6-26.0]20.9 [14.4-28.1]21.3 [15.1-28.1]20.5 [16.2-25.1]20.2 [13.9-27.3]20.8 [14.9-27.4]...9.1 [4.6-15.5]6.8 [4.0-10.7]4.8 [2.0-9.3]8.9 [4.3-15.4]6.7 [3.8-10.6]4.6 [1.8-9.2]8.8 [4.1-15.4]6.5 [3.6-10.5]4.4 [1.7-9.2]8.6 [3.9-15.4]
5Algeria27.4 [22.5-32.7]19.9 [13.6-27.1]34.9 [27.6-42.7]26.7 [21.9-31.8]19.2 [13.2-26.1]34.2 [27.1-41.7]26.0 [21.4-30.9]18.5 [12.7-25.0]33.6 [26.7-40.7]...11.8 [6.5-18.6]7.4 [4.3-11.3]3.1 [1.2-6.2]11.4 [6.2-18.4]7.2 [4.1-11.1]2.9 [1.1-6.1]11.1 [5.8-18.2]6.9 [3.9-10.9]2.8 [1.0-6.0]10.7 [5.5-18.0]
6Andorra25.6 [20.1-31.3]25.9 [18.0-34.3]25.3 [17.7-33.7]25.4 [20.1-31.0]25.5 [17.8-33.8]25.2 [17.7-33.4]25.2 [20.0-30.7]25.2 [17.6-33.3]25.1 [17.8-33.1]...17.5 [10.9-25.3]14.0 [9.6-19.1]10.7 [5.6-17.4]16.9 [10.4-24.8]13.5 [9.1-18.6]10.2 [5.2-16.9]16.4 [9.8-24.4]12.9 [8.6-18.1]9.7 [4.7-16.3]15.8 [9.2-23.9]
7Angola8.2 [5.1-12.2]4.0 [1.6-7.9]12.1 [6.8-19.0]7.9 [4.9-11.7]3.8 [1.5-7.3]11.6 [6.5-18.2]7.5 [4.7-11.2]3.6 [1.4-6.9]11.1 [6.2-17.5]...1.6 [0.5-3.7]0.9 [0.3-2.0]0.3 [0.0-0.9]1.5 [0.4-3.6]0.9 [0.3-2.0]0.3 [0.0-0.9]1.4 [0.4-3.5]0.8 [0.3-1.9]0.2 [0.0-0.8]1.4 [0.4-3.4]
\n", + "

5 rows × 127 columns

\n", + "
" + ], + "text/plain": [ + " country 2016 2016.1 2016.2 \\\n", + "3 Afghanistan 5.5 [3.4-8.1] 3.2 [1.3-6.4] 7.6 [4.3-12.4] \n", + "4 Albania 21.7 [17.0-26.7] 21.6 [14.8-29.0] 21.8 [15.3-28.9] \n", + "5 Algeria 27.4 [22.5-32.7] 19.9 [13.6-27.1] 34.9 [27.6-42.7] \n", + "6 Andorra 25.6 [20.1-31.3] 25.9 [18.0-34.3] 25.3 [17.7-33.7] \n", + "7 Angola 8.2 [5.1-12.2] 4.0 [1.6-7.9] 12.1 [6.8-19.0] \n", + "\n", + " 2015 2015.1 2015.2 2014 \\\n", + "3 5.2 [3.3-7.7] 3.0 [1.3-6.0] 7.3 [4.1-11.8] 4.9 [3.1-7.3] \n", + "4 21.1 [16.6-26.0] 20.9 [14.4-28.1] 21.3 [15.1-28.1] 20.5 [16.2-25.1] \n", + "5 26.7 [21.9-31.8] 19.2 [13.2-26.1] 34.2 [27.1-41.7] 26.0 [21.4-30.9] \n", + "6 25.4 [20.1-31.0] 25.5 [17.8-33.8] 25.2 [17.7-33.4] 25.2 [20.0-30.7] \n", + "7 7.9 [4.9-11.7] 3.8 [1.5-7.3] 11.6 [6.5-18.2] 7.5 [4.7-11.2] \n", + "\n", + " 2014.1 2014.2 ... 1978.2 1977 \\\n", + "3 2.8 [1.2-5.6] 7.0 [4.0-11.3] ... 0.9 [0.3-2.2] 0.6 [0.2-1.2] \n", + "4 20.2 [13.9-27.3] 20.8 [14.9-27.4] ... 9.1 [4.6-15.5] 6.8 [4.0-10.7] \n", + "5 18.5 [12.7-25.0] 33.6 [26.7-40.7] ... 11.8 [6.5-18.6] 7.4 [4.3-11.3] \n", + "6 25.2 [17.6-33.3] 25.1 [17.8-33.1] ... 17.5 [10.9-25.3] 14.0 [9.6-19.1] \n", + "7 3.6 [1.4-6.9] 11.1 [6.2-17.5] ... 1.6 [0.5-3.7] 0.9 [0.3-2.0] \n", + "\n", + " 1977.1 1977.2 1976 1976.1 \\\n", + "3 0.2 [0.0-0.7] 0.9 [0.3-2.1] 0.5 [0.2-1.1] 0.2 [0.0-0.7] \n", + "4 4.8 [2.0-9.3] 8.9 [4.3-15.4] 6.7 [3.8-10.6] 4.6 [1.8-9.2] \n", + "5 3.1 [1.2-6.2] 11.4 [6.2-18.4] 7.2 [4.1-11.1] 2.9 [1.1-6.1] \n", + "6 10.7 [5.6-17.4] 16.9 [10.4-24.8] 13.5 [9.1-18.6] 10.2 [5.2-16.9] \n", + "7 0.3 [0.0-0.9] 1.5 [0.4-3.6] 0.9 [0.3-2.0] 0.3 [0.0-0.9] \n", + "\n", + " 1976.2 1975 1975.1 1975.2 \n", + "3 0.8 [0.2-2.0] 0.5 [0.2-1.1] 0.2 [0.0-0.6] 0.8 [0.2-2.0] \n", + "4 8.8 [4.1-15.4] 6.5 [3.6-10.5] 4.4 [1.7-9.2] 8.6 [3.9-15.4] \n", + "5 11.1 [5.8-18.2] 6.9 [3.9-10.9] 2.8 [1.0-6.0] 10.7 [5.5-18.0] \n", + "6 16.4 [9.8-24.4] 12.9 [8.6-18.1] 9.7 [4.7-16.3] 15.8 [9.2-23.9] \n", + "7 1.4 [0.4-3.5] 0.8 [0.3-1.9] 0.2 [0.0-0.8] 1.4 [0.4-3.4] \n", + "\n", + "[5 rows x 127 columns]" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.rename(columns={'Unnamed: 0':'country'}, inplace=True)\n", + "df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
countryyearvalue
0Afghanistan20165.5 [3.4-8.1]
1Albania201621.7 [17.0-26.7]
2Algeria201627.4 [22.5-32.7]
3Andorra201625.6 [20.1-31.3]
4Angola20168.2 [5.1-12.2]
\n", + "
" + ], + "text/plain": [ + " country year value\n", + "0 Afghanistan 2016 5.5 [3.4-8.1]\n", + "1 Albania 2016 21.7 [17.0-26.7]\n", + "2 Algeria 2016 27.4 [22.5-32.7]\n", + "3 Andorra 2016 25.6 [20.1-31.3]\n", + "4 Angola 2016 8.2 [5.1-12.2]" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df = df.melt(id_vars=['country'], var_name='year')\n", + "df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
countryyearvalue
81India20163.9 [3.0-5.0]
279India2016.12.7 [1.7-4.0]
477India2016.25.1 [3.6-6.9]
675India20153.7 [2.9-4.7]
873India2015.12.6 [1.7-3.7]
1071India2015.24.8 [3.5-6.5]
1269India20143.5 [2.7-4.4]
1467India2014.12.4 [1.6-3.4]
1665India2014.24.6 [3.3-6.1]
1863India20133.3 [2.6-4.1]
2061India2013.12.3 [1.5-3.2]
2259India2013.24.4 [3.2-5.7]
2457India20123.1 [2.5-3.8]
2655India2012.12.1 [1.5-2.9]
2853India2012.24.2 [3.1-5.4]
3051India20113.0 [2.4-3.6]
3249India2011.12.0 [1.4-2.7]
3447India2011.24.0 [3.0-5.1]
3645India20102.8 [2.3-3.4]
3843India2010.11.9 [1.3-2.5]
\n", + "
" + ], + "text/plain": [ + " country year value\n", + "81 India 2016 3.9 [3.0-5.0]\n", + "279 India 2016.1 2.7 [1.7-4.0]\n", + "477 India 2016.2 5.1 [3.6-6.9]\n", + "675 India 2015 3.7 [2.9-4.7]\n", + "873 India 2015.1 2.6 [1.7-3.7]\n", + "1071 India 2015.2 4.8 [3.5-6.5]\n", + "1269 India 2014 3.5 [2.7-4.4]\n", + "1467 India 2014.1 2.4 [1.6-3.4]\n", + "1665 India 2014.2 4.6 [3.3-6.1]\n", + "1863 India 2013 3.3 [2.6-4.1]\n", + "2061 India 2013.1 2.3 [1.5-3.2]\n", + "2259 India 2013.2 4.4 [3.2-5.7]\n", + "2457 India 2012 3.1 [2.5-3.8]\n", + "2655 India 2012.1 2.1 [1.5-2.9]\n", + "2853 India 2012.2 4.2 [3.1-5.4]\n", + "3051 India 2011 3.0 [2.4-3.6]\n", + "3249 India 2011.1 2.0 [1.4-2.7]\n", + "3447 India 2011.2 4.0 [3.0-5.1]\n", + "3645 India 2010 2.8 [2.3-3.4]\n", + "3843 India 2010.1 1.9 [1.3-2.5]" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df[df.country=='India'].head(20)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Correct the format\n", + "\n", + "1. we will drop the first 3 row since its actually headers in the original forms.\n", + "2. correct year value\n", + "3. correct the gender value" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "country object\n", + "year object\n", + "value object\n", + "dtype: object" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.dtypes" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['2015', '2']" + ] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.iloc[1000].year.split('.')" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
countryyearvaluegender
0Afghanistan20165.5 [3.4-8.1]None
1Albania201621.7 [17.0-26.7]None
2Algeria201627.4 [22.5-32.7]None
3Andorra201625.6 [20.1-31.3]None
4Angola20168.2 [5.1-12.2]None
\n", + "
" + ], + "text/plain": [ + " country year value gender\n", + "0 Afghanistan 2016 5.5 [3.4-8.1] None\n", + "1 Albania 2016 21.7 [17.0-26.7] None\n", + "2 Algeria 2016 27.4 [22.5-32.7] None\n", + "3 Andorra 2016 25.6 [20.1-31.3] None\n", + "4 Angola 2016 8.2 [5.1-12.2] None" + ] + }, + "execution_count": 27, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df[['year','gender']] = df.year.str.split('.', expand=True)\n", + "df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([None, '1', '2'], dtype=object)" + ] + }, + "execution_count": 28, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.gender.unique()" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
countryyearvaluegender
0Afghanistan20165.5 [3.4-8.1]Both
1Albania201621.7 [17.0-26.7]Both
2Algeria201627.4 [22.5-32.7]Both
3Andorra201625.6 [20.1-31.3]Both
4Angola20168.2 [5.1-12.2]Both
\n", + "
" + ], + "text/plain": [ + " country year value gender\n", + "0 Afghanistan 2016 5.5 [3.4-8.1] Both\n", + "1 Albania 2016 21.7 [17.0-26.7] Both\n", + "2 Algeria 2016 27.4 [22.5-32.7] Both\n", + "3 Andorra 2016 25.6 [20.1-31.3] Both\n", + "4 Angola 2016 8.2 [5.1-12.2] Both" + ] + }, + "execution_count": 32, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df['gender'] = df.gender.map({None:'Both', '1': 'Male', '2':'Female'})\n", + "df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "country Angola\n", + "year 2016\n", + "value 8.2 [5.1-12.2]\n", + "gender Both\n", + "Name: 4, dtype: object" + ] + }, + "execution_count": 35, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.iloc[4]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## correct the BMI value columns\n", + "\n", + "From the webpage of WHO we can know that the values in [] are actually estimation intervel,so we need to seperate them into 3 columns\n", + "\n", + "you can use str.matach() or str.findall() with regular expression to extract float number in this field,but we are gonna use str.split()" + ] + }, + { + "cell_type": "code", + "execution_count": 54, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
012345
05.53.48.1NoneNoneNone
121.717.026.7NoneNoneNone
227.422.532.7NoneNoneNone
325.620.131.3NoneNoneNone
48.25.112.2NoneNoneNone
.....................
2456513.27.120.7NoneNoneNone
245660.20.10.5NoneNoneNone
245674.21.58.9NoneNoneNone
245682.51.05.1NoneNoneNone
245696.73.012.2NoneNoneNone
\n", + "

24570 rows × 6 columns

\n", + "
" + ], + "text/plain": [ + " 0 1 2 3 4 5\n", + "0 5.5 3.4 8.1 None None None\n", + "1 21.7 17.0 26.7 None None None\n", + "2 27.4 22.5 32.7 None None None\n", + "3 25.6 20.1 31.3 None None None\n", + "4 8.2 5.1 12.2 None None None\n", + "... ... ... ... ... ... ...\n", + "24565 13.2 7.1 20.7 None None None\n", + "24566 0.2 0.1 0.5 None None None\n", + "24567 4.2 1.5 8.9 None None None\n", + "24568 2.5 1.0 5.1 None None None\n", + "24569 6.7 3.0 12.2 None None None\n", + "\n", + "[24570 rows x 6 columns]" + ] + }, + "execution_count": 54, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pd.DataFrame(df.value.str.findall('\\d+\\.\\d+').tolist()\n", + " #, columns=['BMI','BMI_lower', 'BMI_upper']\n", + " )\n", + "\n", + "#pd.DataFrame(df2.teams.tolist(), index= df2.index)" + ] + }, + { + "cell_type": "code", + "execution_count": 62, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "3 23986\n", + "0 504\n", + "6 80\n", + "Name: value, dtype: int64" + ] + }, + "execution_count": 62, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.value.str.findall('\\d+\\.\\d+').apply(len).value_counts()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "504 observations have no data\n", + "\n", + "80 observations have two entries" + ] + }, + { + "cell_type": "code", + "execution_count": 61, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
countryyearvaluegender
112Monaco2016No dataBoth
149San Marino2016No dataBoth
162South Sudan2016No dataBoth
165Sudan2016No dataBoth
307Monaco2016No dataMale
...............
24487Monaco1975No dataFemale
24524San Marino1975No dataFemale
24537South Sudan1975No dataFemale
24540Sudan1975No dataFemale
24541Sudan (former)19751.7 [0.5-4.1] 1.8 [0.6-4.1]Female
\n", + "

584 rows × 4 columns

\n", + "
" + ], + "text/plain": [ + " country year value gender\n", + "112 Monaco 2016 No data Both\n", + "149 San Marino 2016 No data Both\n", + "162 South Sudan 2016 No data Both\n", + "165 Sudan 2016 No data Both\n", + "307 Monaco 2016 No data Male\n", + "... ... ... ... ...\n", + "24487 Monaco 1975 No data Female\n", + "24524 San Marino 1975 No data Female\n", + "24537 South Sudan 1975 No data Female\n", + "24540 Sudan 1975 No data Female\n", + "24541 Sudan (former) 1975 1.7 [0.5-4.1] 1.8 [0.6-4.1] Female\n", + "\n", + "[584 rows x 4 columns]" + ] + }, + "execution_count": 61, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df[df.value.str.findall('\\d+\\.\\d+').apply(len).ne(3)]" + ] + }, + { + "cell_type": "code", + "execution_count": 66, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
countryyearvaluegenderBMIBMI_lowerBMI_upper
0Afghanistan20165.5 [3.4-8.1]Both5.53.48.1
1Albania201621.7 [17.0-26.7]Both21.717.026.7
2Algeria201627.4 [22.5-32.7]Both27.422.532.7
3Andorra201625.6 [20.1-31.3]Both25.620.131.3
4Angola20168.2 [5.1-12.2]Both8.25.112.2
\n", + "
" + ], + "text/plain": [ + " country year value gender BMI BMI_lower BMI_upper\n", + "0 Afghanistan 2016 5.5 [3.4-8.1] Both 5.5 3.4 8.1\n", + "1 Albania 2016 21.7 [17.0-26.7] Both 21.7 17.0 26.7\n", + "2 Algeria 2016 27.4 [22.5-32.7] Both 27.4 22.5 32.7\n", + "3 Andorra 2016 25.6 [20.1-31.3] Both 25.6 20.1 31.3\n", + "4 Angola 2016 8.2 [5.1-12.2] Both 8.2 5.1 12.2" + ] + }, + "execution_count": 66, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df[['BMI','BMI_lower','BMI_upper']] = pd.DataFrame(df.value.str.findall('\\d+\\.\\d+').tolist()).drop(columns=[3,4,5])\n", + "df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 68, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(24570, 7)" + ] + }, + "execution_count": 68, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.shape" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Check validity of all columns or fields\n", + "We now have a pretty clean data compared to the one we just got. but our job is still not done yet. we need to go through every columns or fields to make sure the data is reletively correct.\n", + "\n", + "**Country columns**\n", + "\n", + "### What we know:\n", + "\n", + "There is a country named country which need to be fixed\n", + "\n", + "There are Nones in country column which need to be fixed\n", + "\n", + "We have\n", + "\n", + "### What we do:\n", + "\n", + "We gonna drop those entries." + ] + }, + { + "cell_type": "code", + "execution_count": 69, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "country 0\n", + "year 0\n", + "value 0\n", + "gender 0\n", + "BMI 504\n", + "BMI_lower 504\n", + "BMI_upper 504\n", + "dtype: int64" + ] + }, + "execution_count": 69, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.isna().sum()" + ] + }, + { + "cell_type": "code", + "execution_count": 72, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
countryyearvaluegenderBMIBMI_lowerBMI_upper
112Monaco2016No dataBothNoneNoneNone
149San Marino2016No dataBothNoneNoneNone
162South Sudan2016No dataBothNoneNoneNone
165Sudan2016No dataBothNoneNoneNone
307Monaco2016No dataMaleNoneNoneNone
........................
24345Sudan1975No dataMaleNoneNoneNone
24487Monaco1975No dataFemaleNoneNoneNone
24524San Marino1975No dataFemaleNoneNoneNone
24537South Sudan1975No dataFemaleNoneNoneNone
24540Sudan1975No dataFemaleNoneNoneNone
\n", + "

504 rows × 7 columns

\n", + "
" + ], + "text/plain": [ + " country year value gender BMI BMI_lower BMI_upper\n", + "112 Monaco 2016 No data Both None None None\n", + "149 San Marino 2016 No data Both None None None\n", + "162 South Sudan 2016 No data Both None None None\n", + "165 Sudan 2016 No data Both None None None\n", + "307 Monaco 2016 No data Male None None None\n", + "... ... ... ... ... ... ... ...\n", + "24345 Sudan 1975 No data Male None None None\n", + "24487 Monaco 1975 No data Female None None None\n", + "24524 San Marino 1975 No data Female None None None\n", + "24537 South Sudan 1975 No data Female None None None\n", + "24540 Sudan 1975 No data Female None None None\n", + "\n", + "[504 rows x 7 columns]" + ] + }, + "execution_count": 72, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df[df.value=='No data']" + ] + }, + { + "cell_type": "code", + "execution_count": 71, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Monaco 126\n", + "San Marino 126\n", + "South Sudan 126\n", + "Sudan 126\n", + "Name: country, dtype: int64" + ] + }, + "execution_count": 71, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df[df.value=='No data'].country.value_counts()" + ] + }, + { + "cell_type": "code", + "execution_count": 73, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "504" + ] + }, + "execution_count": 73, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "126*4" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "If you missed dropping top 3 rows before melting\n", + "```python \n", + "df.dropna(subset=['country'], inplace=True)\n", + "```" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## BMI \\ BMI_upper_esti and BMI_lower_esti columns\n", + "\n", + "### What we know:\n", + "\n", + "1. 4 contries have no BMI data which are Monaca,Sudan,South Sudan and San Marino,hence they dont have estimations.\n", + "2. We have 191 countries that do have BMI data and each of them has 126 entries.\n", + "3. The descriptive statistics of BMI data seems OK, no outliers.\n", + "\n", + "### What we do:\n", + "\n", + "1. We gonna create a new dataframe without those 4 countries to analyze.\n", + "2. We gonna change the data type of BMI and estimations to float." + ] + }, + { + "cell_type": "code", + "execution_count": 76, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "country 0\n", + "year 0\n", + "value 0\n", + "gender 0\n", + "BMI 504\n", + "BMI_lower 504\n", + "BMI_upper 504\n", + "dtype: int64" + ] + }, + "execution_count": 76, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.isna().sum()" + ] + }, + { + "cell_type": "code", + "execution_count": 77, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "country 0\n", + "year 0\n", + "value 0\n", + "gender 0\n", + "BMI 0\n", + "BMI_lower 0\n", + "BMI_upper 0\n", + "dtype: int64" + ] + }, + "execution_count": 77, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.dropna(inplace=True)\n", + "df.isna().sum()" + ] + }, + { + "cell_type": "code", + "execution_count": 78, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(24066, 7)" + ] + }, + "execution_count": 78, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 79, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "country object\n", + "year object\n", + "value object\n", + "gender object\n", + "BMI object\n", + "BMI_lower object\n", + "BMI_upper object\n", + "dtype: object" + ] + }, + "execution_count": 79, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.dtypes" + ] + }, + { + "cell_type": "code", + "execution_count": 80, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "country object\n", + "year int32\n", + "gender object\n", + "BMI float64\n", + "BMI_lower float64\n", + "BMI_upper float64\n", + "dtype: object" + ] + }, + "execution_count": 80, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.drop(columns=['value'], inplace=True)\n", + "df['year'] = df.year.astype('int')\n", + "df[['BMI','BMI_lower','BMI_upper']] = df[['BMI','BMI_lower','BMI_upper']].astype('float')\n", + "df.dtypes" + ] + }, + { + "cell_type": "code", + "execution_count": 81, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(24066, 6)" + ] + }, + "execution_count": 81, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 82, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
countryyeargenderBMIBMI_lowerBMI_upper
0Afghanistan2016Both5.53.48.1
1Albania2016Both21.717.026.7
2Algeria2016Both27.422.532.7
3Andorra2016Both25.620.131.3
4Angola2016Both8.25.112.2
\n", + "
" + ], + "text/plain": [ + " country year gender BMI BMI_lower BMI_upper\n", + "0 Afghanistan 2016 Both 5.5 3.4 8.1\n", + "1 Albania 2016 Both 21.7 17.0 26.7\n", + "2 Algeria 2016 Both 27.4 22.5 32.7\n", + "3 Andorra 2016 Both 25.6 20.1 31.3\n", + "4 Angola 2016 Both 8.2 5.1 12.2" + ] + }, + "execution_count": 82, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 83, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
yearBMIBMI_lowerBMI_upper
count24066.0000024066.00000024066.00000024066.000000
mean1995.5000012.4489329.23724316.232112
std12.1211710.4074288.85428112.003078
min1975.000000.1000000.0000000.200000
25%1985.000003.9000002.2000006.300000
50%1995.5000010.6000007.00000014.800000
75%2006.0000018.17500013.80000023.000000
max2016.0000063.30000055.60000070.800000
\n", + "
" + ], + "text/plain": [ + " year BMI BMI_lower BMI_upper\n", + "count 24066.00000 24066.000000 24066.000000 24066.000000\n", + "mean 1995.50000 12.448932 9.237243 16.232112\n", + "std 12.12117 10.407428 8.854281 12.003078\n", + "min 1975.00000 0.100000 0.000000 0.200000\n", + "25% 1985.00000 3.900000 2.200000 6.300000\n", + "50% 1995.50000 10.600000 7.000000 14.800000\n", + "75% 2006.00000 18.175000 13.800000 23.000000\n", + "max 2016.00000 63.300000 55.600000 70.800000" + ] + }, + "execution_count": 83, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.describe()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Activity (15 minutes)\n", + "\n", + "## Visualization and EDA\n", + "Before you doing any EDA, come up with some questions first. Question orientated is always a good way to explore a set of data, you could easily fall into rabbit holes you enconter along the process otherwise.\n", + "\n", + "What question we could possibly answer through this data?" + ] + }, + { + "cell_type": "code", + "execution_count": 89, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 89, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXQAAAD4CAYAAAD8Zh1EAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/Z1A+gAAAACXBIWXMAAAsTAAALEwEAmpwYAAAKzklEQVR4nO3dUaik9XnH8d9T3VJLbHKxB7RqPL0QGmwxCYuNzY0UWpIoeGOpuUhKbpZICglY6DYX2pKL2ovmIm7JshBJhZC0YBCpa9tchMZAE3J2WZPoUliCwUXbnJiyKlmIK08vdoTD8ZwzM7tzzrj//Xxg2Jl5/+d9n6vvDi/vO1PdHQAuf7+27AEAWAxBBxiEoAMMQtABBiHoAIO4elkH3r9/f6+uri7r8ACXpePHj/+8u1e22ra0oK+urmZtbW1Zhwe4LFXVT7fb5pQLwCAEHWAQgg4wCEEHGISgAwxC0AEGIegAgxB0gEEs7cYi2CtVtSfH8dsCLJtP6Ayvu+d+3PxX/zr338CyCTrAIAQdYBCCDjAIQQcYhKADDELQAQYh6ACDEHSAQQg6wCAEHWAQgg4wiKlBr6qbqurbVXWqqp6rqs9usebOqjpbVScnjwd3Z1wAtjPLty2eT/JAd5+oqmuTHK+qb3X385vWPdPddy9+RABmMfUTene/3N0nJs9fS3IqyQ27PRgA85nrHHpVrSb5QJLvb7H5jqp6tqqerqpbt/n7g1W1VlVr6+vr808LwLZmDnpVvSvJ40k+192vbtp8IsnN3X1bkkeSPLHVPrr7aHcf6O4DKysrFzkyAFuZKehVtS8XYv617v7m5u3d/Wp3vz55fizJvqrav9BJAdjRLFe5VJKvJDnV3V/cZs11k3Wpqtsn+31lkYMCsLNZrnL5cJJPJPlRVZ2cvPf5JO9Nku4+kuTeJPdX1fkk55Lc136TC2BPTQ16d383yY6/stvdh5McXtRQAMzPnaIAgxB0gEEIOsAgBB1gEIIOMAhBBxiEoAMMQtABBiHoAIMQdIBBCDrAIAQdYBCCDjAIQQcYhKADDELQAQYh6ACDEHSAQQg6wCAEHWAQgg4wCEEHGISgAwxC0AEGIegAgxB0gEEIOsAgBB1gEIIOMAhBBxjE1KBX1U1V9e2qOlVVz1XVZ7dYU1X1pao6XVU/rKoP7s64AGzn6hnWnE/yQHefqKprkxyvqm919/Mb1nw0yS2Txx8k+fLkXwD2yNRP6N39cnefmDx/LcmpJDdsWnZPksf6gu8leU9VXb/waQHY1lzn0KtqNckHknx/06Ybkry44fWZvD36AOyimYNeVe9K8niSz3X3q5s3b/EnvcU+DlbVWlWtra+vzzcpADuaKehVtS8XYv617v7mFkvOJLlpw+sbk7y0eVF3H+3uA919YGVl5WLmBWAbs1zlUkm+kuRUd39xm2VPJvnk5GqXDyU5290vL3BOAKaY5SqXDyf5RJIfVdXJyXufT/LeJOnuI0mOJflYktNJfpnkUwufFIAdTQ16d383W58j37imk3xmUUMBMD93igIMQtABBiHoAIMQdIBBCDrAIAQdYBCCDjAIQQcYhKADDELQAQYh6ACDEHSAQQg6wCAEHWAQgg4wCEEHGISgAwxC0AEGIegAgxB0gEEIOsAgBB1gEIIOMAhBBxiEoAMMQtABBiHoAIMQdIBBCDrAIAQdYBCCDjCIqUGvqker6mdV9eNttt9ZVWer6uTk8eDixwRgmqtnWPPVJIeTPLbDmme6++6FTATARZn6Cb27v5PkF3swCwCXYFHn0O+oqmer6umqunW7RVV1sKrWqmptfX19QYcGIFlM0E8kubm7b0vySJIntlvY3Ue7+0B3H1hZWVnAoQF4yyUHvbtf7e7XJ8+PJdlXVfsveTIA5nLJQa+q66qqJs9vn+zzlUvdLwDzmXqVS1V9PcmdSfZX1ZkkDyXZlyTdfSTJvUnur6rzSc4lua+7e9cmBmBLU4Pe3R+fsv1wLlzWCMASuVMUYBCCDjAIQQcYhKADDGKW73KBd4zb/vY/cvbcG3tyrNVDT+3q/t99zb48+9Cf7OoxuLIIOpeVs+feyAsP37XsMRZit//D4MrjlAvAIAQdYBCCDjAIQQcYhKADDELQAQYh6ACDEHSAQQg6wCAEHWAQgg4wCEEHGISgAwxC0AEGIegAgxB0gEEIOsAgBB1gEIIOMAhBBxiEoAMMQtABBiHoAIMQdIBBCDrAIKYGvaoeraqfVdWPt9leVfWlqjpdVT+sqg8ufkwAppnlE/pXk3xkh+0fTXLL5HEwyZcvfSwA5jU16N39nSS/2GHJPUke6wu+l+Q9VXX9ogYEYDaLOId+Q5IXN7w+M3nvbarqYFWtVdXa+vr6Ag4NwFsWEfTa4r3eamF3H+3uA919YGVlZQGHBuAtiwj6mSQ3bXh9Y5KXFrBfAOawiKA/meSTk6tdPpTkbHe/vID9AjCHq6ctqKqvJ7kzyf6qOpPkoST7kqS7jyQ5luRjSU4n+WWST+3WsABsb2rQu/vjU7Z3ks8sbCIALoo7RQEGIegAgxB0gEEIOsAgBB1gEIIOMAhBBxiEoAMMQtABBiHoAIMQdIBBCDrAIAQdYBCCDjAIQQcYhKADDELQAQYx9ReL4J3k2vcdyu//06Flj7EQ174vSe5a9hgMRNC5rLx26uG88PAYEVw99NSyR2AwTrkADELQAQYh6ACDEHSAQQg6wCAEHWAQgg4wCEEHGISgAwxC0AEGIegAg5gp6FX1kar676o6XVVv+2akqrqzqs5W1cnJ48HFjwrATqZ+OVdVXZXkH5P8cZIzSX5QVU929/Oblj7T3XfvwowAzGCWT+i3Jznd3T/p7l8l+UaSe3Z3LADmNUvQb0jy4obXZybvbXZHVT1bVU9X1a1b7aiqDlbVWlWtra+vX8S4AGxnlqDXFu/1ptcnktzc3bcleSTJE1vtqLuPdveB7j6wsrIy16AA7GyWoJ9JctOG1zcmeWnjgu5+tbtfnzw/lmRfVe1f2JQATDVL0H+Q5Jaq+p2q+vUk9yV5cuOCqrquqmry/PbJfl9Z9LAAbG/qVS7dfb6q/iLJvye5Ksmj3f1cVX16sv1IknuT3F9V55OcS3Jfd28+LQPALprpN0Unp1GObXrvyIbnh5McXuxoAMzDnaIAgxB0gEEIOsAgBB1gEIIOMAhBBxiEoAMMQtABBjHTjUXwTrJ66Kllj7AQ775m37JHYDCCzmXlhYfv2pPjrB56as+OBYvilAvAIAQdYBCCDjAIQQcYhKADDELQAQYh6ACDEHSAQbixiOFNfr98/r/7+/nW+xldlk3QGZ7QcqVwygVgEIIOMAhBBxiEoAMMQtABBiHoAIMQdIBBCDrAIGpZN11U1XqSny7l4DDd/iQ/X/YQsIWbu3tlqw1LCzq8k1XVWncfWPYcMA+nXAAGIegAgxB02NrRZQ8A83IOHWAQPqEDDELQAQYh6FxxqurNqjpZVc9W1Ymq+sPJ+6tV1VX1hQ1r91fVG1V1ePL6b6rqL5c1O+xE0LkSnevu93f3bUn+Osnfbdj2kyR3b3j9p0me28vh4GIJOle630ryfxten0tyqqreuqnoz5L8y55PBRfBb4pyJbqmqk4m+Y0k1yf5o03bv5Hkvqr6nyRvJnkpyW/v6YRwEQSdK9G57n5/klTVHUkeq6rf27D935J8Icn/JvnnvR8PLo5TLlzRuvu/cuGLuFY2vPerJMeTPJDk8SWNBnPzCZ0rWlX9bpKrkryS5Dc3bPqHJP/Z3a9U1VJmg3kJOleit86hJ0kl+fPufnNjuLv7ubi6hcuMW/8BBuEcOsAgBB1gEIIOMAhBBxiEoAMMQtABBiHoAIP4fxB9kf0zXyMMAAAAAElFTkSuQmCC\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "df.loc[(df.gender=='Male') & (df.country=='India'),'BMI'].plot.box()" + ] + }, + { + "cell_type": "code", + "execution_count": 95, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 95, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "df.loc[(df.gender=='Female') & (df.country=='India'),['year','BMI']].plot.line(x='year',y='BMI', title='Indian females avg. BMI over the last 42 years')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## list of top 10 countries having obesity (BMI ≥ 30) issues" + ] + }, + { + "cell_type": "code", + "execution_count": 96, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
countryyeargenderBMIBMI_lowerBMI_upper
0Afghanistan2016Both5.53.48.1
1Albania2016Both21.717.026.7
2Algeria2016Both27.422.532.7
3Andorra2016Both25.620.131.3
4Angola2016Both8.25.112.2
\n", + "
" + ], + "text/plain": [ + " country year gender BMI BMI_lower BMI_upper\n", + "0 Afghanistan 2016 Both 5.5 3.4 8.1\n", + "1 Albania 2016 Both 21.7 17.0 26.7\n", + "2 Algeria 2016 Both 27.4 22.5 32.7\n", + "3 Andorra 2016 Both 25.6 20.1 31.3\n", + "4 Angola 2016 Both 8.2 5.1 12.2" + ] + }, + "execution_count": 96, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 105, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "country\n", + "Nauru 53.042857\n", + "Palau 42.080952\n", + "Cook Islands 41.816667\n", + "Marshall Islands 40.454762\n", + "Tuvalu 35.080952\n", + "Niue 34.307143\n", + "Tonga 33.885714\n", + "Samoa 33.659524\n", + "Micronesia (Federated States of) 31.995238\n", + "Kiribati 31.204762\n", + "Name: BMI, dtype: float64" + ] + }, + "execution_count": 105, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "avg_BMI_per_country_over_the_years = df.loc[df.gender=='Both'].groupby('country').BMI.mean()\n", + "avg_BMI_per_country_over_the_years[avg_BMI_per_country_over_the_years.ge(30)].sort_values(ascending=False)" + ] + }, + { + "cell_type": "code", + "execution_count": 106, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "1.5452380952380953" + ] + }, + "execution_count": 106, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "avg_BMI_per_country_over_the_years['India']" + ] + }, + { + "cell_type": "code", + "execution_count": 116, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([2012, 2013, 2014, 2015, 2016])" + ] + }, + "execution_count": 116, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.year.sort_values().unique()[-5:]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df.year.ge(2012)" + ] + }, + { + "cell_type": "code", + "execution_count": 117, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "country\n", + "Nauru 60.32\n", + "Cook Islands 54.86\n", + "Palau 54.22\n", + "Marshall Islands 51.82\n", + "Tuvalu 50.12\n", + "Niue 48.40\n", + "Tonga 46.80\n", + "Samoa 46.02\n", + "Kiribati 44.72\n", + "Micronesia (Federated States of) 44.38\n", + "Kuwait 36.78\n", + "United States of America 34.92\n", + "Jordan 34.30\n", + "Saudi Arabia 34.08\n", + "Qatar 33.78\n", + "Libya 31.26\n", + "Lebanon 30.84\n", + "Turkey 30.82\n", + "Egypt 30.64\n", + "Bahamas 30.52\n", + "United Arab Emirates 30.34\n", + "Name: BMI, dtype: float64" + ] + }, + "execution_count": 117, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "avg_BMI_per_country_over_last5_years = df.loc[(df.gender=='Both') & (df.year.isin(df.year.sort_values().unique()[-5:]))].groupby('country').BMI.mean()\n", + "avg_BMI_per_country_over_last5_years[avg_BMI_per_country_over_last5_years.ge(30)].sort_values(ascending=False)" + ] + }, + { + "cell_type": "code", + "execution_count": 118, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "3.5" + ] + }, + "execution_count": 118, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "avg_BMI_per_country_over_last5_years['India']" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## BMI Trend of a particular country over the years" + ] + }, + { + "cell_type": "code", + "execution_count": 129, + "metadata": {}, + "outputs": [], + "source": [ + "def bmi_trend(df, some_country = 'New Zealand'): \n", + " \"\"\"\n", + " generating BMI trend plot for a given country\n", + " \"\"\"\n", + " sns.scatterplot(data= df[df.country==some_country] ,\n", + " x='year',\n", + " y='BMI',\n", + " hue='gender'\n", + " )\n", + " plt.title(f'BMI trend of {some_country} from {df.year.min()}-{df.year.max()}')\n", + " plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 130, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXgAAAEWCAYAAABsY4yMAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/Z1A+gAAAACXBIWXMAAAsTAAALEwEAmpwYAABeqUlEQVR4nO2dd3iUVfbHPzczk8yk9x6S0EkooYqAgCKCgoCu2NaC7C67dl3LWtayuquu+lv7qthgLahrWRTsFRCk916SkJDe6yQzk/v7451MMpmhJ5lJcj/P8z6ZufctZ94k3/fOueeeI6SUKBQKhaL74eNpAxQKhULRMSiBVygUim6KEniFQqHopiiBVygUim6KEniFQqHopiiBVygUim6KEnhFhyOEmCyEyD3FY2OEECuEENVCiP/rSLuEEDuFEJNP8VwXCSFyhBA1Qojh7WWjQnE6KIH3IoQQWUKIertIlAshlgshklr1LxJCSCHErDbHPWtvn2d/P08IseoY15FCiL4d9kHalwVACRAspbyjbaf9nvy9PS4kpUyXUv50ioc/DdwkpQyUUm5uD3tOFCGErxDiI/vfj2z7kBJChAohFgshiuzbw636etn/3lpvUghxh71/shCiqU3/tcewZYYQYpUQokIIUSCEeE0IEdSq308I8aYQosre/+c2xy8UQuy1X3Oem/P3FkIssz/wS4QQT57qfesJKIH3Pi6UUgYCcUAh8EKb/n2A4x9MCKEH5gIH28sA+zm9hWRgl/T+FXnJwE53HZ10P1cBVwEFbvqeAfyBFGAMcLUQ4joAKeVh+0Mp0P53NwRoAj5udXxe632klIuPYUcI8HcgHhgEJAJPtep/GOiHdr/OBu4WQkxv1b8VuAHY1PbEQghf4FvgByDWfu53jmFLj0cJvJcipTQDHwFpbbo+B8YLIcLs76cD23D/j+2CEGKF/eVW+2jssmZXhRDiL0KIAuAtIYSPEOIeIcRBIUSpEOJDIUS4/Rwp9lHetUKIw/aR1P2trmGyj6zLhRC7gNHHsWmcEGK9EKLS/nOcvX0R2sPsbrut5x7nPKdll30EfK799RghxBr7SDRfCPGiXWDaXtNPCFED6Oz39GCrc/1FCLENqBVC6IUQs4TmBqoQQvwkhBjU5tp3CSG2CSFqhRBvCM099aV9tPpdq9+5E1LKRinls1LKVYDNzS4XAk9KKeuklFnAG8D8o9zGa4AV9v1OGinle1LKr+zXKgdeA8a3Of+jUspyKeVue/+8Vse/JKX8HjC7Of08tIfNv6SUtVJKs5Ry26nY2VNQAu+lCCH8gcuAX9t0mYHPgMvt768B/nOi55VSTrS/HGYfjX1gfx8LhKONrBYAtwBzgEloo7Fy4KU2p5sADACmAA+2EqyHgD72bRqtvnG4+ZzhwHLgeSAC+BewXAgRIaWcB7yLJk6BUsrvTvBjnrZdaEJ5OxAJnGk/1w1td5JSNthHvqDd0z6tuq8AZgChQG9gCXAbEAV8AXze5qHxG2Aq0B9NlL8E7rPb4IP2OzlVRJvXg4+y3zVA2xF6tBCiUAiRKYR4RggRcBLXnYj9m439ARWPNkpvZiuQfoLnGgtk2R96JfaH5JCTsKXHoQTe+/ifEKICqEL7Z3/KzT7/Aa4RQoSgCfD/2uG6TcBDdsGqB/4I3C+lzJVSNqB9tb6kjbvhb1LKeinlVrR/1GH29kuBf0gpy6SUOWjifTRmAPullG9LKa1SyiXAHjSBO1VO2y4p5UYp5a92m7KAV9Hu9cnwvJQyx34/LwOWSym/lVJa0Hz2JmBcq/1fkFIWSimPACuBtVLKzfb7/ylwqpO3XwH3CCGChDb3Mh/NZeOEEOIsIAbtm2Mze4AMNJfhOcBItIfwcRFCTEV7iD5ob2p+EFa22q0SCOLESEQb2DyP9qBYDix1981KoaEE3vuYI6UMBfyAm4CfhRCxrXewfxWPAv4KLLMLyOlSbHcLNZMMfGp3J1QAu9FGtTGt9mntFqqj5R84Hshp1Zd9jOvGu+nPBhJO3HQXTtsuIUR/+2RegRCiCngMbSR9MrS+ltPnlFI22ftbf87CVq/r3bwP5NS4xX78fmAp2jcJd1FN1wIfSylrWtlZIKXcJaVsklJmAncDl4D2QBAtE69O8w9CiLHAe8AlUsp99ubm8wa32jUYqD7Bz1EPrJJSfimlbER7SEag+foVblAC76VIKW1Syk/QRHWCm13eAe7gJNwzx7tkm/c5wPlSytBWm9E+ujwe+UBSq/e9jrFvHtrDpDW9gBO5zslyMna9jDZ67SelDEZzlYhj7O+O1vfU6XMKIYTdlo74nM5GaN9YfiuljJVSpqP9369rvY8QwoQ2WX+sCVTQPpOwn3dlq4lXh5tFaGGinwHz7f70ZjvK0X4Hw1qdbxhHmZx2wzZc/04Vx0AJvJciNGYDYWij57Y8j+bCWeGm73gUovmEj8UrwD+EEMl2e6Ls9pwIHwL3CiHChBCJwM3H2PcLoL8Q4kr7RORlaBPLy07wWifDydgVhOYmqxFCDASub4drzxBCTBFCGNAezg3A6tM8L+CY7DXa3/oKIYz2hwhCiD5CiAghhE4IcT7aHEvb0NKLgArgxzbnnSy0UEohtJDdJ9C+BRzNjsFoLqGbpZSfu9nlP8Bf7b+DgcAfgEWtjve1fw4BGOyfo1mn3gHGCiHOFULo0OYzSnD//6FACbw38rk9KqMK+AdwrZTSZYRjH5V9f4rhgw8Di+3ul0uPss9zaKOwb4QQ1WiTvWec4Pn/huaOyAS+Ad4+2o5SylJgJprglaK5AGZKKUtO8FonwwnbBdwJXInmPngN+OAY+x4XKeVetDDGF9BE6UK0kNjG0zlvK/aiuTASgK/tr5u/MYwEtqN9lseB37r5m7oW+I+bv6cRwBqgFu1htINjT/begeY+fOMo7puH0EJ6s4GfgaeklF+16v/Gbvs4YKH99URwuoevoE36zwZmteM97HYI7w8vVigUCsWpoEbwCoVC0U1RAq9QKBTdFCXwCoVC0U1RAq9QKBTdFG9KKkVkZKRMSUnxtBkKhULRZdi4cWOJlDLKXZ9XCXxKSgobNmzwtBkKhULRZRBCHHVFtnLRKBQKRTdFCbxCoVB0U5TAKxQKRTfFq3zw7rBYLOTm5mI2u8v/37MxGo0kJiZiMBg8bYpCofBCvF7gc3NzCQoKIiUlBXvuJAUgpaS0tJTc3FxSU1M9bY5CofBCvF7gzWazEnc3CCGIiIiguLjY06YoFIpTJLc6l92lu6m31dMvtB8DwgfgI9rPc+71Ag8ocT8K6r4oFF2Xw1WHueG7G8iu1qIc9T56Xpv6GqNiR7XbNdQkq0KhUHiAzUWbHeIOYG2y8tKWl6iz1LXbNZTAnybz5s3jo48+Ov6OCoVC0YqSeteSB7k1uZit7RdQ0qEuGiFEFlqRARtglVK233ePLorVakWv7xKeMYVC0YEMixrm0nZJv0sIN4W32zU6YwR/tpQywxvE/dFHH2XgwIFMnTqVK664gqeffpqDBw8yffp0Ro4cyVlnncWePXsAbWR+yy23MG7cOHr37u0YpUspuemmm0hLS2PGjBkUFRU5zr9x40YmTZrEyJEjmTZtGvn5+QBMnjyZ++67j0mTJvHcc891/gdXKBRex+DIwTw18Sli/GMw6ozMS5/HrD6z2vUaPWYouWHDBj7++GM2b96M1WplxIgRjBw5kgULFvDKK6/Qr18/1q5dyw033MAPP/wAQH5+PqtWrWLPnj3MmjWLSy65hE8//ZS9e/eyfft2CgsLSUtLY/78+VgsFm6++WaWLl1KVFQUH3zwAffffz9vvvkmABUVFfz888+evAUKhcKLMOqNTE+dzujY0ViaLESZotD56Nr1Gh0t8BKtpqcEXpVSLmy7gxBiAVoRYHr1OlaR+9Nj1apVzJ49G5PJBMCFF16I2Wxm9erVzJ0717FfQ0OD4/WcOXPw8fEhLS2NwsJCAFasWMEVV1yBTqcjPj6ec845B4C9e/eyY8cOpk6dCoDNZiMuLs5xrssuu6zDPptCoei6RJgiOuzcHS3w46WUeUKIaOBbIcQeKeWK1jvYRX8hwKhRozqsQKy72rNNTU2EhoayZcsWt8f4+fm5Pd5deKKUkvT0dNasWeP2XAEBASdpsUKhUJweHeqDl1Lm2X8WAZ8CYzryesdiwoQJfP7555jNZmpqali+fDn+/v6kpqby3//+t9letm7deszzTJw4kffffx+bzUZ+fj4//vgjAAMGDKC4uNgh8BaLhZ072xauVygUis6jwwReCBEghAhqfg2cB+zoqOsdj9GjRzNr1iyGDRvGxRdfzKhRowgJCeHdd9/ljTfeYNiwYaSnp7N06dJjnueiiy6iX79+DBkyhOuvv55JkyYB4Ovry0cffcRf/vIXhg0bRkZGBqtXr+6Mj6ZQKBRuEe5cF+1yYiF6o43aQXMFvSel/Mexjhk1apRsW/Bj9+7dDBo0qF1sqqmpITAwkLq6OiZOnMjChQsZMWJEu5zbU7Tn/VEoFF0PIcTGo0UpdpgPXkp5CHAN9PQgCxYsYNeuXZjNZq699touL+4KhcK7kVJyoOIA2VXZBPkG0T+0P2GmsE67fo8JkwR47733PG2CQqHoQazNX8sN39+ApckCwNTkqdw/5n4i/DsucqY1KlWBQqFQdAAV5gr+sfYfDnEH+Db7W3aX7+40G5TAKxQKRQdQY6khu8q1HnapubTTbFACr1AoFB1ApCmSyUmTXdpTglM6zQYl8AqFQtEBGPVGbhtxG6NjRgMQ7BvMYxMeY0DYgE6zoUdNsp4qOp2OIUOGIKVEp9Px4osvMm7cuKPun5WVxerVq7nyyisBWLRoERs2bODFF1/sLJMVCoUX0Du0Ny9MeYGC2gJMehPxgfGden01gj8BTCYTW7ZsYevWrTz++OPce++9x9w/KytLRewoFAoAAgwB9Ant0+niDt1Q4P+3+Qjjn/iB1HuWM/6JH/jf5iPtev6qqirCwrQ4Vikld911F4MHD2bIkCF88MEHANxzzz2sXLmSjIwMnnnmGQDy8vKYPn06/fr14+67725XmxQKheeoaaxhW/E2VuauJLMy023eK0/RrVw0/9t8hHs/2U69xQbAkYp67v1kOwBzhiec8nnr6+vJyMjAbDaTn5/vSCf8ySefOEb2JSUljB49mokTJ/LEE0/w9NNPs2zZMkBz0WzZsoXNmzfj5+fHgAEDuPnmm0lKSjrNT6xQKDxJZUMlr2x9hXd2vwOASW/ihXNe4Iy4MzxsmUa3GsE/9fVeh7g3U2+x8dTXe0/rvM0umj179vDVV19xzTXXIKVk1apVjtTBMTExTJo0ifXr17s9x5QpUwgJCcFoNJKWlkZ2tmv4lEKh6FrsK9/nEHeAems9D69+mLL6Mg9a1UK3Evi8ivqTaj8VzjzzTEpKSiguLj6pr2KtUw/rdDqsVmu72aRQKDxDcV2xS1tuTS5VjVUesMaVbiXw8aGmk2o/Ffbs2YPNZiMiIoKJEyfywQcfYLPZKC4uZsWKFYwZM4agoCCqq6vb7ZoKhcI7SQxKdGkbEjmECGPnpCI4Ht3KB3/XtAFOPngAk0HHXdNOL+602QcP2sTq4sWL0el0XHTRRaxZs4Zhw4YhhODJJ58kNjaWiIgI9Ho9w4YNY968eY5JWYVC0b3oF9qPh898mH+u/yf11npSg1N5YOwDBPkFedo0oAPTBZ8K7ZEu+H+bj/DU13vJq6gnPtTEXdMGnNYEq7ej0gUrFJ5FSsnh6sPUNNYQFxBHuCm8U6/vkXTBnmLO8IRuLegKhcK7EEKQHJzsaTPc0u0EXqFQKNqbkroSsquzMfgYSA1JJcjXO1wwx0MJvEKhUByDgxUHuf3H28msygRgWso07hp1FzEBMR627Ph0qygahUKhaE9sTTaW7F7iEHeAr7O+ZlPRJg9adeIogVcoFIqjUGupZU3+Gpf2XaW7PGDNyaMEXqFQKI5CgCGAiYkTXdoHRw72gDUnjxL4E0AIwdVXX+14b7VaiYqKYubMmcc87qeffjruPgqFwnvR+ei4tP+lDApvCUWe3Wc2w6OHe9CqE0dNsp4AAQEB7Nixg/r6ekwmE99++y0JCSoUU6HoCaSGpvLK1Fc4XHUYX50vKcEp+Bv8PW3WCdH9RvDbPoRnBsPDodrPbR+2y2nPP/98li9fDsCSJUu44oorHH3r1q1j3LhxDB8+nHHjxrF3r2tys9raWubPn8/o0aMZPnw4S5cubRe7FApFxxNuDCcjOoO0iLQuI+7Q3QR+24fw+S1QmQNI7efnt7SLyF9++eW8//77mM1mtm3bxhlntKQDHThwICtWrGDz5s088sgj3HfffS7H/+Mf/+Ccc85h/fr1/Pjjj9x1113U1taetl0KhaJ9yKnKYWvRVo5Ut28NCU/SvVw03z8CljaZIy31WvvQS0/r1EOHDiUrK4slS5ZwwQUXOPVVVlZy7bXXsn//foQQWCwWl+O/+eYbPvvsM55++mkAzGYzhw8fVmkGFAoP0ySb+PHwj9z/y/3UWmoJ8QvhybOeZFzC0ctydhW6l8BX5p5c+0kya9Ys7rzzTn766SdKS0sd7Q888ABnn302n376KVlZWUyePNnlWCklH3/8MQMGdF7BXYVCcXyyKrO4e8XdNDY1AloRj7tW3MWHMz8kIahrz7V1LxdNiGvqzmO2nyTz58/nwQcfZMiQIU7tlZWVjknXRYsWuT122rRpvPDCC44c8ps3b24XmxQKxelRWFfoEPdmqhqrKKor8pBF7Uf3EvgpD4KhTe53g0lrbwcSExO59dZbXdrvvvtu7r33XsaPH4/NZnNzpDbKt1gsDB06lMGDB/PAAw+0i00KheL0iDRFohfOzgyT3tTpWSE7gm6XLphtH2o+98pcbeQ+5cHT9r97MypdsEJxelhsFj47+BmP/vooNmnD4GPg8bMe57zk8xBCeNq849Kj0gUz9NJuLegKhaJ9MegMzOoziyFRQyiuKyY2IJaU4JQuIe7Ho/sJvEKhUJwkBp2B/mH96R/W39OmtCsdLvBCCB2wATgipVTr9hUKhUew2CzkVOcgkSQEJmDUGz1tUofTGSP4W4HdQHAnXEuhUChcKK4r5vXtr/PB3g9okk1c2PtCbhp+E3GBcZ42rUPp0CgaIUQiMAN4vSOvo1AoFMfi1/xfeW/Pe9ikDYnks0Of8UPOD542q8Pp6DDJZ4G7gaYOvo5CoVAclRW5K1zavsz8EmuT1QPWdB4dJvBCiJlAkZRy43H2WyCE2CCE2FBcXNxR5pwWOp2OjIwMx5aVldVh10pJSaGkpKTDzq9Q9ESGRA5xaRsRPQK9j2fjTKSUZJfWsr+omvpG92toToeO/HTjgVlCiAsAIxAshHhHSnlV652klAuBhaDFwXegPaeMyWRiy5YtnjZDoVCcIpOSJvHJgU84WHEQgLiAOGb1meVRm6rqLfx3Qw5Pf7OPeouN8wfHcs/5A0mOCGi3a3TYCF5Kea+UMlFKmQJcDvzQVtw7guWHlnPeR+cxdPFQzvvoPJYfWt4h19m4cSOTJk1i5MiRTJs2jfz8fAAmT57M7bffzsSJExk0aBDr16/n4osvpl+/fvz1r391HD9nzhxGjhxJeno6CxcudHuNd955hzFjxpCRkcEf//jHo66SVSgUxyY5OJlXp77Kq+e+ystTXmbx9MX0DevrUZu25lTw6PLd1Fu0/+svdxTw7trDNDW13zi3W6UqWH5oOQ+vfpj82nwkkvzafB5e/fBpi3x9fb3DPXPRRRdhsVi4+eab+eijj9i4cSPz58/n/vvvd+zv6+vLihUr+NOf/sTs2bN56aWX2LFjB4sWLXIkKXvzzTfZuHEjGzZs4Pnnn3dKXgbaCtUPPviAX375hS1btqDT6Xj33XdP63MoFD2ZGP8YxiWMY0LiBK+IntmWW+nStnTLEcrqGt3sfWp0igNKSvkT8FNHX+e5Tc9htpmd2sw2M89teo4ZvWec8nnbumh27NjBjh07mDp1KgA2m424uJY/mFmztK9+Q4YMIT093dHXu3dvcnJyiIiI4Pnnn+fTTz8FICcnh/379xMREeE4x/fff8/GjRsZPXo0oD1koqOjT/kzKBQ9gerGakrrSwnyDSLCFHH8AzxIUrjJpS09LpgA3/aT5W61krWgtuCk2k8VKSXp6emsWeNabR3Az88PAB8fH8fr5vdWq5WffvqJ7777jjVr1uDv78/kyZMxm50fTFJKrr32Wh5//PF2tV2h6K7sLdvLo78+ytbircQHxPPQuIcYGzcWH+GdjoqRyWGMTA5lY3YFAIF+em6Z0g+Tr67druGdn/wUiQ2IPan2U2XAgAEUFxc7BN5isbBz584TPr6yspKwsDD8/f3Zs2cPv/76q8s+U6ZM4aOPPqKoSEtZWlZWRnZ2dvt8AIWim1FhruC+lfextXgrAHm1edz0/U0cqjjkYcuOTkKYPy9fNZL/zB/DwqtH8tlN48noFdau1+hWAn/riFsx6pyXHxt1Rm4d4Zri93Tw9fXlo48+4i9/+QvDhg0jIyOD1atXn/Dx06dPx2q1MnToUB544AHGjh3rsk9aWhp///vfOe+88xg6dChTp051TOQqFApnCmoL2Fexz6nN0qSlJvBmooOMTOwfxXnpsfSOCmz383e7dMHLDy3nuU3PUVBbQGxALLeOuPW0/O/ejkoXrFBAbnUul3x+CbUW5zrH/zn/PwyPHu4hqzQaLDYKqsz4++qICmr//Dc9Kl3wjN4zurWgKxQKVxKDErlvzH3c/0tLNNtl/S+jb6hnQyEzS2r5v2/2snx7PjFBRh6dM5hJ/aPw1XeO86TbCbxCoeiZTEuZRu/Q3hyuPkykMZIB4QMI8g3ymD0NVhvPfLePZds012pBlZkFb2/gfzeMZ1hSaKfY0CUEXkrZLZLvtzfe5F5TKDyNn96PwZGDGRw52NOmAFBU1cCyrXlObVLCweKaThN4r59kNRqNlJaWKjFrg5SS0tJSjMbun9NaoWhNWX0Z+TX5Xp8ozN9XR3yoa6x7iMnQaTZ4/Qg+MTGR3NxcvDURmScxGo0kJiZ62gyFolNosDaw4sgKnlz/JGX1ZcztP5er068mITDB06a5JSLQj0dmp/P7xRtozj4wvk8EgxNCOs0Grxd4g8FAamqqp81QKBQeZmfZTv78058d79/d8y5GvZFbRtzitYuZJvaLYumN4zlYXEOwyUB6fDAxwZ33rdvrBV6hUCgA9pbudWn7ZP8n/HbQb4nyj/KARcdHr/NhSGIoQxJDPXJ973zsKRQKRRtCjaEubXEBcV5RW7Wq3kJ+ZT0Wm3fVNlIjeIVC0SUYEjmEvqF9OVBxAAC90HP7yNs9GgoppWRtZhmPLd/NoZJaZmXEs+Cs3qREtl9O99NBCbxCoegSJAYl8tKUl9hdtptaSy19QvowMHygR23aW1DNNW+so9E+cn9v7WEq6xr516UZ+BnaL2nYqaIEXqFQdBniA+OJD4z3tBkO9hfXOMS9mS92FHDnNDOpXjCKVz54hULhVUgpKakrobax9vg7e5hAN7nbQ0wGjJ2UiuB4eIcVCoVCAeTX5PPvLf9m7rK5/OGbP/Br3q/Ymry3VGV6fDCjU5xT/D44M404NwucPIHXZ5NUKBQ9gybZxHObnuPNHW862vRCzzsXvEN6ZLoHLTs2eRX1bD9SSUl1A/1ighiaGIzR0Hne7x6VTVKhUHRNiuqKeH/P+05tVmllf8V+rxb4+FCT25QE3oBy0SgUik6nwdpAubncKceUr48vYUbXikb+ev/ONO2oVNY1Utvg3flv2qIEXqFQdCo7SnZw5893ctmyy3h207OOqkvhpnDuHHWn074pwSmkRaR5wkwHZbUNvPtrNrNf+oUrXvuVH/cU0WDx3nmB1igfvEKh6DSyKrO4YvkV1FhqHG3TUqbx9/F/x6g30mBtYGfpTnaW7iTMGMawqGEkBSV50GL4YP1h/vLxdqe2D/84ljGpER6yyJlj+eDVCF6hUHQaBysPOok7wDdZ35BXo+VN99P7MSJmBFenXc3M3jM9Lu41ZgtvrMp0aV+1v8QD1pw8SuAVCkWnYdS55o0x6o0YdJ2XI/1k0Pn4EBHo59Ie4u+d9rZFCbxCoeg0+of1Z0jkEKe2G4bdQGKgd9Y1MPnquOnsvvi0KigXYjIwvm+k54w6CZQPXqFQdCpHqo+wuWgzh6sPMzRqKMOihnk0YdjxsNia2J5bydrMUgL89JyRGs6A2GBPm+VAxcErFAqvISEogYQg76zC5A6DzocRyWGMSHYN4fR2lItGoVB0CA22Biw2i6fNOGGklNQ1dq049+OhRvAKhaJdqWmsYU3eGt7e9Tb+Bn/mpc9jZMxIr51IBThQVM2HG3JZtb+Ec9OiuWh4oldkgzxdlMArFIp2ZU3eGv78c0vt1NV5q1k0fREjYkZ40KqjU1Rt5vp3NrG/SAvf3JVfxa8Hy3jt2pGEmHw9bN3poVw0CoWi3WiwNbB412KnNonkx5wfPWTR8TlUXOsQ92bWZZWRVVLnIYvaDyXwCoXilLHanH3WAkGA3tW1YdJ7RzKupiaJrck5clDfOgaydbvOfXtXosMEXghhFEKsE0JsFULsFEL8raOupVAoOpfsqmwWblvItV9dy8JtC8muygbAV+fLvMHzELSIo5/Oj4mJEz1lKgCNVhurD5Twx7c38rvF6/lpbxH19gnVPlGBTOznHNd+0fAEr6mrejp0WBy8EEIAAVLKGiGEAVgF3Cql/PVox6g4eIXC+yk3l3PT9zexrWSboy0jKoMXznmBUGMojbZGthdv54ecH/DX+zM5abLH0/2uPVTK5a/9Smu5WzRvNJMHRgOQW17H6oOlbMwuZ2xqOGf2iSQ2xHXVrTfikTh4qT05mh1bBvvmPauqFArFKZFdle0k7gBbireQVZVFhjEDX50vI2NHMjJ2pIcsdGXpljzajmUXrcnirP5R6HwEiWH+XDrKn0tHeTb3TXvToT54IYROCLEFKAK+lVKudbPPAiHEBiHEhuLi4o40R6FQtAM+wr1sHK3dG/B1UyPVV+dD1/eyH5sO/Y1IKW1SygwgERgjhBjsZp+FUspRUspRUVFRHWmOQqFoB1KCUzgr4SyntrMSziIlOMUzBp0AFw6Lc5pMFQLmjUvB5ygTrN2FTomDl1JWCCF+AqYDOzrjmgqFomMI9gvm/rH3s/rIatYVrGNM3BjGx48n2M978rO0JSMpjA/+OJZl2/JpsDYxe1g8w3t1vdQDJ0tHTrJGARa7uJuAb4B/SimXHe0YNcmqUCgUJ4enko3FAYuFEDo0V9CHxxJ3hUKhULQvHRlFsw0Y3lHnVygUHUeTbGJ78Xa+OPQFtdZaZvaZyfCo4fjpXYtfeAuFVfWs3F/Kd7sKyOgVxnlpMfSOCvS0WR5F5aJRKBQu7CjZwbyv52Ft0hYDLT24lFfOfYXxCeM9bJl7Gqw2XvrxIP9Zoy24+mpnIR9vzOXt348hNtg7VtF6Au+Na1IoFB7jp5yfHOLezFs73vLa9L+Hy+p459dsp7b9RTXsK6g5yhE9g2OO4IUQfz5Wv5TyX+1rjkKh8AbaijuATdqQ3rpWUbpfRdnkRRXrPMHxXDTeW0dLoVCcFqX1pWwo3MCq3FUMCB/AhIQJpISkAHBOr3NYvGsxTbLJsf816dfgq/Ns+tzd+VV8t7uQvAoz09JjGJUSTqCfnqRwE78ZkchHG3Md+yaGmegf4+USlr8N9n4JNQUwaBYkjQHf9suBo2qyKhQ9EGuTlZe3vszCbQsdbb2De/Pqea8SGxCLtcnKlqItfLD3A2ottVw+8HJGxYzC3+DvMZv3FVYz95U1VNa3uImeuzyD2Rla+b+8inq+3VXIZ1vzGJMazkXDE7xb4At2wFvToaG6pe3StyFt1kmd5pTDJIUQzx+rX0p5y0lZolAovIIjNUd4a8dbTm2Hqg6xv3w/sQGx6H30jIodxajYUUgp0XIHepZtORVO4g7wf9/s46x+UYQH+BIfauLacSlcPTa5a6xQzVnrLO4APz0OqZPAFNIulziei+ZPaCtPPwTyoNunblAoegRNsgmbtLm0u/O9e4O4A1hsrt6GRmsTTW3yu3cJcQewNrhpM0Mrt9jpcjyBjwPmApcBVuAD4GMpZXm7WaBQKDqMgxUH+TXvV/Jq8xgfP56M6Az8Df4kBCYwp88cPjnwiWPfCGMEfcP6etDaYzMkMQQ/vQ8N1hYBvH5yHyKDvDc2H3OVNlI/+COEpUCfyRDZX+vrNRZ0BmgdmTThz+DffikUTtgHL4RIAK4A/gz8RUr5drtZYUf54BWK9iOrMovrvr6OkvoSR9tjEx7jwj4XApBfk8932d+x7NAyhkYN5ZL+lzAgfICnzD0uUko2Ha7grV8yOVJRz1Vjkzl7QBThAV4s8BsWwbJbW96H9IJrP4fwFGhqgty1sOZlbZJ19B+g77knLfCnnapACDECTdynAl8CG0/KAoVC0ensLtvtJO4Az216jnHx44gwRRAXGMfV6Vczd8BcfHW+Xp3uFzRX0cjkMDKSQrE1NeGr13napGNTlQffP+zcVnkYCrZqAu/jA73OhITRmltG3/4RSsebZP0bMBPYDbwP3CuldHXSKRQKr6PR1ujSVmepc/GzG/Vdo3JRMzofgc7Hy8UdoMkGFjeFu9suFtN1XEKB4535AeAQMMy+PWafcBFoRZuGdphlCoXitBgQPgA/nR8NtpbJvOsGX0e0f7QHrTo2dY1WNh+uYF1mKTHBRsb2jui6+WSC42HsDbDqmZY23wCITus0E44n8KmdYoVCoWh3BoQN4PXzXuetnW+RU5XDpQMuZUqvKV4TFeOO73YXcsuSLY73iaFG3v39WJK7YgFsHx2MWQAB0bD5PxA5AMbdDDGdJ/AnvdBJCBEJlMoOWCGlJlkVivbHYrPQ2NRIgMG7RbK0poELX1hFXqXZqf2lK4czY2i8h6xqJxqqQWcEvaHdT32sSdZjzqoIIcYKIX4SQnwihBguhNiBFhdfKISY3u6WKhSKdsegM3i9uIMW015ldp3iM1vaLy7cY/gFdYi4H4/juWheBO4DQoAfgPOllL8KIQYCS4CvOtg+hUJxDBptjewo2cH2ku2E+IaQEZ3hyCfjrRwpr2PT4XJyy+sZHB9CRlIoQSYDsSFGfjchlee+3+/Y11fnw8A4L043AFCeDbnroTIX4jMgYaQm6F7A8QReL6X8BkAI8YiU8lcAKeUeb/bjKRQ9hTV5a7jph5sc7+MC4njtvNdIDk72oFVHp6jazK3vb2ZDdoWj7cGZaVw3PgUhBFeM6UWAn4531x4mKczELVP6kxbnvbVeqcyDD6+F/M0tbRc+ByPnecyk1hxP4Ft/N6pv0+c9WcoUih5IZUMlz2x6xqktvzafHSU7vFbg9+RXO4k7wNPf7OXctBh6hfsTG2JkwcQ+XDYqCT+DDqPBy8MhC7c7izvAtw9Cn3MhNNEzNrXieAI/TAhRhRYWabK/xv6+awXPKhTdjEZbI+Vm16whtZZaD1hzYtQ3uua/qWu00Wh1bg/x92xa4hOm0U2ce0M12NzkmfEAxxR4KaWXPz4Viu5PUV0RO0p2UFhbSO/Q3qRHpBPoG0ikKZIrB17Ji1tedOyrEzoGhQ/yoLVaSoGdeVVsP1KJr96HoQkh9LOn7e0bHUiAr47aVkI/PT2WhFDPpSE+Lk1N2urTvK1gMEHCCIjsp/VFD9TaLK0cHEMvgxDPj95B1WRVKLyacnM5j6x+hJ+P/Oxou2vUXVyVdhU+woc5feeg89Hx/p73iTZFc/OImxkU4VmB33S4nCtfW+tIChbmb2DJgrEMjA2mT3Qg7/z+DJ75dh+7C6qZNSyea85MxuTrxWPJnDXwn9ktK1ADo7V8MlEDIXoQXL0UfvwHlOyFIZfB6PngJcXJlcArFF7M/vL9TuIO8MLmF5iUNInk4GRiAmL4/ZDfc3Hfi/HT+3k8HNJqa+L1lZlOGR/L6yz8vLeYgbHaZOnwXmG8evUoahqsRAT4end6X4sZVvzLOb1ATREcWqEJPECvM+CKJdBYC/6RWo4ZL0EJvELhxdRZXX28ZpuZhja5xMNN4Z1l0jGxNElyyl1tzqtwjtEw+eq8e9TejK0RqnJc22sKnN/7BrRrqb32wnseNQpFD2Z/+X6WHVzGF4e+ILMy09GeEpziMiofHTOa+EDvXNlpMui4eqxrBM+UQTEesKYdMAZraXzb0ntyp5tyKqgRvELhYXaU7OB3X//OMVoP9QvljWlv0D+sPykhKSycupBnNz7LnrI9TEmewvzB8wn09d4EXFMGxvDATCuv/HwIk0HHndP6MzK5/YpYdDqDZmmTqGteAN8gOPchSHCbGcDrUEW3FQoPIqXk4dUPO1VWArhx2I38KeNPjvd1ljqqG6uJMEag78D0su1JcbUZnY/w7oIcJ0N1oVaByd873GHNnHbBD4VC0THYpM3JJdNMdnW203t/gz/+Bi8OJXRDVFA3WyoT1PXcTErgFYpOoKaxht1lu8mvzSfGP4aB4QMJ8QtB76PnN/1/w+Zi59WQ5yWf5yFLT4yq+kZ25FWRX2EmPtRIenwwwaYusjjJHXXlULBNq8IU2gtih2j+9y6OEniFooOx2Cws2bOE5zc/72iblz6PGzJuwKQ3MSFhAneMvIPXtr+GwcfAjcNvZFSs9/p4zRYbr63M5IUfDjjabjmnLzee0xc/by+j546GWlj5NKxpWTDGlIfgzJs9kgGyPVE+eIWigzlQfoBLPr8Em3Rejv/+jPdJj0x3vC+sLcRH+BDlH9XZJp4Uu/KqmPHCSlpLh4+A5becxSBvTgx2NI5shtcmO7f56OFPq7SFTF6O8sErFB6k2lLtIu4A1Y3VTu9jArqGj7fabKHtuLBJQlW9xf0B3k5DpWtbk1XLKdPF6TCBF0IkAf8BYtGyUi6UUj7XUddTKDxNfk0+e8v30mBroF9oP3qH9gYgMTCRuIA48mvzHfuG+oWSFJTkKVNPiJyyOnbnV2FtkgyIDaKPvTZqr3B/ooL8KK5uWWwVHeRHUriXTwKXZUHhDpA2iE6HyL5ae1gqmMKgvlXittBe2tbF6TAXjRAiDoiTUm4SQgQBG4E5UspdRztGuWgUXZXsqmxu/v5mMqu0iBh/vT+vT3udIZFDANhVuot/rvsnm4o2kRaRxv1n3M/QKO+tWX+wqIZ5b60jp1xbgRps1PPuH8YyJCEEgG25FTz82U42Ha5gRK9QHp6VztDEUA9afByK9sA7F0PVEe29KQyu/Qxi7b+DnPXwxV1a6t/k8TD9cYgb5jl7TwKPuGiklPlAvv11tRBiN5AAHFXgFYquyrr8dQ5xBy3FwKIdi3jirCcw6AykRaTx0pSXqGioINg3mGA/7/ZV/7y/2CHuAFVmK4tXZ/HP3wxF5yMYmhjK4uvGUF5nIczfQJDJyycj9yxvEXfQRuub3oELntTeJ42Ga5Zq7QERXlOR6XTpFB+8ECIFGA6sddO3AFgA0KtX1/9KpOiZHK4+7NK2v3w/ZqsZg04Tv0DfQK9egdqaA4U1Lm2786totDVh8tEiZYJMXUDYmyl0M67M3wI2KzQvHDOFaFs3osNz0QghAoGPgduklFVt+6WUC6WUo6SUo6KivDt6QNGzsTZZ2VO2h2+yvmFd/joqzBWOvjGxY1z2n913NkFePBK02JrYmVfJ8m15/HqolIq6Rkff2QNc/xcvGZmIyZsrLFkbIW8L7PwUsn8Bc6vJ07QLXffP+G2LuHdTOvTTCSEMaOL+rpTyk+Ptr1B4M78c+YVbf7zVEREzs/dM7h59N2HGMIZFD+PeMffy/ObnabA2MLf/XC5IvcDDFh+b73YXcuO7m2iyT8NdMSaJv0wfSKi/L6NTw7n/gkE8890+LLYmrjkzmemDYz1r8PHYvRQ++QOOEJ8zroez79MWLKVM0GLbVzylTbKecQP0n+ZZezuBjpxkFcBioExKeduJHKMmWRXeSnFdMZcvv5yiuiKn9tfOe42xcWMd7/Nr8rE2WYkNjMXg473ui7yKemY8v5LyOufQxvcXjGVs7whAy5NzpKIeW5MkIdSEXufFyWfLs+GVCdDQxkkw/2voZf/9SAkVOUAThCSBjxd/GzkJPBUHPx64GtguhNhib7tPSvlFB15ToegQai21LuIOUGYuc3ofFxjXWSadFtVmq4u4A5TVtrhphBAkhnl56GMz5kpXcQeoK215LQSE9ax5vo6MolmFVpxboegyVDRUsL98P9WN1aQEp5AakooQgij/KEbHjGZ94XrHvgJBcpBr7nNvoqy2kX0FVVQ3WOkdGUifaG2SNzbEj2GJIWzNbfFT63wEKRFdRNDbEhyvVVgq3tPSpjNAeKrnbPICvPg7l0LRuZTWl/LY2seY//V8bv3xVi5ddinrCzRBDzAEcO8Z9zIiegQA4cZw/m/S/9E/rL8nTT4mhVVm7vl4G5e/tpY//GcjM19Yxfos7RtHiMmXJ34zlOFJoQBEBfrxylUj6R/jvZPCxyQgEi5+DeIytPdBcXD5Eogc6FGzPI3KRaNQ2Fl1ZBXXf3e9U1vvkN4snr6YUGMooGWFLKwrJNAQ6PWpBb7fXcjvFjv/Pw1LDOGd353hCG+sqrdQUGUm2KgnNsTkCTPbl/oKqC7Qwh2Duoa77HRRuWgUihOgtL7UpS2zMpNaS61D4LtSLHvrVALN7CmoprrB6hD4YJOB4K4Sy34imEK1TQEogVf0QHKrc8mszMSkN9EntA9hRq2cXHKwqz99YuJEIkwRnW3iSZFdUsuhkloCfHX0iwkiLEDLy54a6VoE+rz0GCICu3Le9jLNz95YCxF9e7yP/XgogVf0KHaV7uKP3/6RioYKAM5KOIsHz3yQ2IBYBoYP5G/j/saT65+k1lLLsKhh3D7ydox6761MtDWngqvfXEtVvRWAaekxPDJ7MDHBRgYnhPDo7HQe/3IPdY02xqaGc+uU/l0zZztAVT4svwP2Ltfem8Lgqk8gYYRn7fJilMAregwN1gZe3fqqQ9wBVh5ZybbibcQGxGLUG7m438WMiR1DnbWOOP84r16JWtto5cmv9zjEHeDrnYVcMjKJqWlGAvz0XDU2mYn9o6hvtJEQZiLI2IXdMXmbWsQdtLwx3z8Kl78Dvq7fVhRK4BU9iBpLDdtLtru0Z1c51z9NDErsLJNOi2qzhe25rrnMj1S0JAkTQpAc0U3Er8I13w/5m8FcpQT+KKgwSUW3o8HawJ7SPaw6sopDFYdokk0AhPiFMKXXFJf9B4V7d9Ues8XKjiOV/Ly3iANFNTRHvoX7+zI1zTWSp29U15gEPirlWXDge8hdr4l3M1FuQh77nw/+kZ1mWldDjeAV3Qqz1cx/9/2Xp9Y/hURi8DHw9KSnOafXOeh99Px20G85WHGQ9YXr0fvoWTBkgSNnuzdS16il6X3y671ICX56H169eiSTB0Tjq9dx/eS+ZJXUsfFwOb46H247tx9DE707FfExyd0I7/6mpfjG8Gvg3AchIErztU95CH56HGyNkDgGJtze5eumdiQqDl7RrdhVuovLl12OpOXvOtg3mA9nfkhCUAKglco7UnMEP50fiUGJXp0zZktOOXNeWu3UFhnoy2c3TSA+VItbr6y3kFteh9GgIznc37tzxhyLhmp4Zy7krHFu/+3H0O9c7bXNCuWZYKmH0ORul973VFBx8IoeQ0l9iZO4A1Q1VlHeUO4Q+CDfIAaGd40VjkVVrrHsJTWNlNc1OgQ+xGQgpDsIXX0l5G10ba/Ka3mt00Nkv86zqYujBF7RJSk3l5NZmYkQgpTgFEcse1xAHHqhxypbIkuiTFFEmTxba6CpSZJZUktBZT1RwUZ6RwY4jbRLaho4VFyDjxD0iQ4kzF+LVU8INeEjcKT0BUgMMxEd5NfZH6H9sJihdL+WCCy0F4RrtWvxj4A+U2Dfl877h3l3vh9vRgm8osuRXZXN/avuZ2vxVgBGRI/g7xP+TlJQEqkhqTx+1uM8tPoh6qx1RBgjeHLikx5NKyCl5OtdBdz2/hYarE0YdIInLh7K7Ix49DofDhXXcOuSLWzP0yJixvWJ4J+/GUJSeAD9YgL5v7nDuO/THdRbbEQH+fHsZRlEBXlvbP4xaaiBda/BD4+AbAK/YLj8PUg9C3xNMOUBbZK1eDf46GHyfRA33NNWd1mUwCu6HN8f/t4h7gCbijbx4+EfuSb9GvQ+eqanTic9Ip3yhnKi/aOJDfBsoYrs0jru+HArDVYtmsdik9zzyTaGJITQPzaIpVvyHOIOsPpgKT/vK+GqsQH46nXMGZ5ARq9QKuosxIUYu3bOmKJd8P3DLe8bqmDpjfC77yAoGmLSYd4yqMjWQh/D+2hZIRWnRBedjVH0ZH458otL2+o854nIpOAkhkYN9bi4g+Z+qWu0ObVZbJKiajONNhs/7yt2OWZtZkteHCEEqZGBDO8V1rXFHZz96c1UZEPrPEABkZAwUguLVOJ+WiiBV3gtOVU5bCjYwKGKQ9iaWgRyctJkl30nJU7qRMvcU2u2si23gg1ZZZTWtEyORgX5EeTn/GXZT+9DTIgRX52OqYOiXc41oW8Xj+2uLYHDa+HIJjBXt7SHJLnuG95HC4NUtDtK4BVeyaojq5i7bC7XfX0dl3x+CcsPLcfSpFUgOjvpbCbET3DsOzFxIhOTJnrKVAAKK+u5/3/bmfXiL1zyyhquemMtB4o0YUuOCOD5K4Y7RN7fV8czl2XQO1JbkDRzaDxn9m5JaHb+4FjO6teFBa9kP7w7F948D147W3PBVOZqfTFpcMHToLMnPAuIgote1kbtinZHxcErvI68mjwuXXYplQ2tqg0JHR9e+KGjwEZ1YzXZVdlaVaXgZI+n8P18ax43L9ns1HbduBT+OjMNnY9W2Cy7tJaiajORAX6kRAaglS3WqKxvJLOkFp3wITXSn8CumjNGSvj+b7DqGef2i16BYVdor21WKDsAdeVaFE1IQufb2Y1QcfCKLkVJfYmTuAPYpI3C2kKHwAf5BjE4crAnzHPLlpwKl7af9hVze4PVkW89OSLgqHlhQky+ZCR14TS+zTTWwv5vXNsPr20ReJ3efdoBRbujBF7RoRTXFZNXk0egbyC9gns5rRo1W81kV2XTYGugV1AvR1GNCGMEwb7BVDW25CHRCR3R/q6+6s6moKqeI+Vmgk16UiNaYtmHJrouNJrYP5IAvy78L9ZYA6WHoMmi+cnbFtIoy4SaQgiMacnL7hsAfadC4U7nfZPO6BSTFc504b8+hbezq3QXt/14G/m1+eiFnptH3MxlAy4jwBBAaX0pC7ctZMmeJUgkaRFpPD7hcXqH9iYhKIEnznqCO36+g3prPQYfAw+OfZDeIb09+nm25lTwx7c3UlBlxqAT3HfBIC4bnYS/r54zUiO4cFgcn2/NB2BATCBXnZHscM90Oary4Lu/wbb3tffJ42HWCxDRR3PD7PsKPlmghTn6BcPFC6H/dBAChl8FB3+Agm3asQNnanHuik5H+eAVHUJNYw3Xf3c9W4q3OLUvnr6YETEj+OHwD9z6461OfXP7z+W+M+5D76NHSsnh6sMU1hYSYYogOTgZvY/nxiOVdY1c8dpaduVXObV/cv04RiRrq2irzRYOFdfSaG0iNTKAyK682nTbB5qAt2bSPXD2vVB6AF45Cyx1LX2+AfDHldoDALQomtID2mKlyP5g7MIJ0LycY/ngVRSNokMobyh3EXeAIzVHANhbttelb0XuCqobtcgTIbTJ0zFxY+gT2sej4g5QUtvoIu4AuRUtIhdkNDAsKZTRqeFdW9wBslzXGrBnGTTWQXW+s7iD5nuvzm95HxAJvcZC4igl7h5ECbzitGiSTWRVZrGjZIdT0eoQ3xAGhrlOpMX4aykD+oT2cekbEzuGQEPHR8NU1jWyPbeCfYXVNFqdFyDZmiQHi2rYllNBWatY9lCTgd6R/i7nig3u4guPqgu1FL2lBzXXSzOJbgaEfc4GgwkCokHf5gGm99PaFV6FEnjFKWO2mvnv3v9yyeeXcMXyK7j2y2vZXbobgGC/YP469q8E+7aM3ualz2NA+AAAMqIzmJYyzdEXHxDPvMHzMHTwysUDRTXMX7SBC1/8hfOfW8lz3++nvLYRgJoGK4tXZ3LB8yuZ9dIvXLpwDbvto/aIQD+e+M1QRyy7EHDzOX0ZFOu9Jf2Oy5GN8PoUeP0ceGUCbF0CFvtDLXUy9D67Zd+ogTD8au2DR/SFWS+2rDLVGbT3Ea4PbYVnUT54xSmztWgrV315lVPb0MihvDL1FYJ8NeHLrc4lpzqHYN9gUkNS8Te0jIKrGqrIrMykwdZAcnByhycEs9iaeGjpTt5b51z67a15ozl7YDTrMku59NVfnfom9I3k1atHOqJhsktrySmrJ9TfQJ+oAEy+XTROobYU3poOJfuc2xf8BPH25F51ZVp/kxUi+kFQq9+PzaplhKwugKBYrV/XRe9FF0fFwSs6hNyaXJe2bSXbKDOXOQQ+MSjxqDVOg/2CGRY9rENtbE1lnYXvdhe6tO/Kr+LsgdEcLqt36Vt1oISy2kaHwB8rlr1LUVPoKu4A5YdbBN4/XPOju0Onh+hB2qbwWpSLRgFAfm0+u0t3U1znmviq1lLL3rK9ZFZmOtIFAG5zrKcGpzq5ZTzFkfJ6dhyppLja7GgLMuodES+tSY3UBDsm2HViND0+mGBTFx4HNTVp/vX87VpBjWb8wyEoznX/IM+lVVa0P0rgezhSSlbmruTyZZdz6bJLufKLK9lY0FJVJ7sqm7t+votLPr+Eiz+7mFe2vkK5WauXOSB8AFcPutqxr7/enwfPfNBRfMMT2Jok3+4qZOYLK5n5wiou+vdqNmVr9voZdNx8dl+iAluEfGpaNCN6hQKamP/2jF6OviA/PX+blU6IqYuuMG2ohnUL4ZXx8OoEePcSKNqj9QXFwuyXtEnTZibepaXrVXQblA++h5NZmcmln1+K2dYy0o0wRrBk5hJi/GN4buNzvLnzTadjXjj7BSb3mgxoo/tDlYeoNFeSFJxEcrBnq+/sK6hmxgsrsdha/q7jQowsvWk80fYiGbnldRwqrsXkq6NfdCCh/i0CXmO2sK+ohup6CymRXdwdk7kSFs90bkubAxe9CgajFjVTuh/KsiEgAqIGaPHsii6F8sErjkp+Tb6TuAOUmksprC3EX+fPN4dd84psLd7qEPgAQwBDIod0hqknRG5FnZO4A+RXmimoNDsEPjHMn8Qw15BHgECjgRG9PPcNpF0pO+Tatu9LqCuBkEQtIiayv7YpuiUd5qIRQrwphCgSQuzoqGsoThxLk4WsyizNj25r8aOHGcMQOC+nN+qMhPqF4m/wZ2jkUJdz9Q7tnJQBpTUN7CmoorDS7NLXaLVxqLiGQ8U1WOyVkgAiA1396EF+ekJNXTQ7YzOVR7T8LnVlrn0NNVC0W8sN0/obeaAbf3rMEC21gKJH0JE++EXA9A48v+IEKakv4dmNz3LR0ouYs3QOT6x7goLaAgB6h/TmjlF3OPb1ET48MPYBLTGYzsB1g68j3Bju6B8ZM5KR0SM73ObNh8uZ+8pqpj+7kgtfXMWKfcU0uxMLKuv5x/LdTH1mBVOfWcFjX+6msEp7CPSLDuTO81pGpDofweMXD6FXV3W1NNlg75ewcCK8PA7emgF5W1r6Sw/Af+fBv8dqvvZ1CzXfO2jRMGlzWvb1C4Lpj6mVpT2IDvXBCyFSgGVSyhPK66p88B3D5wc/575V9zm1/fWMv3LZwMsAbcHSwYqDFNUVERcYR5+QPk4LjnKrc8mszMRX50vf0L5EmCLoSIqqzMx+6RfyW43cjQYflt98Fn2iA3l/3WHu+WS70zFPXTKUuaO0akF1jVb2F9ZQVG0mKcyfPtGBGHRdNJ6gcCe8OlGLRW8mcgBc96Um1MvvgE2LnY+59nNItRdAqSuD4j1grtJi1SPVYqTuhvLB93C+P/y9S9vyzOXMHTAXH+GDUW8kPTKddNxHUBwrlr0jyKs0O4k7gNnSRE5ZHX2iA/lsq2tdz+Xb8h0C7++rZ1hSaGeY2vGUZzmLO0DJXi3bo60Rdi11PaZod4vA+4dD8rgON1PhnXh8WCOEWCCE2CCE2FBc7BqDrXAmryaPrMosGqwNLn1VDVUcqjhESV2JU7u7SdDhUcPxER3/669tsHKwuIb8StdFRNAc0VJDQ6ucMCEmPSaDzmXfiEAt2mWUm1j2kW7auhQ2i5Z7vSxTi11vxl0pO1MYGEPALxBi3Hw5DlYVkhQaHhd4KeVCKeUoKeWoqKguXIeyg6m11PLR3o/4zWe/Ydb/ZnH/L/eTU5Xj6N9dupsF3y5g9tLZXPHFFaw+strhsz4n6Rx6BbXEd8f4x3Bhnws73OYDRTVc/85Gpvzfz8x4fhXLtuU5kntV11tYvDqTac+s4Nx//cy9H28np0zLUJgSEcCjcwbTqqIdt53bj77R9hqmw+JJDDU6+hLDTJw/JLbDP0+HUZkH3z4EL43WfOkrnoRa+2AnahCceVPLvsIHZj4DYb00n/q5DzmHNvY+GxJGdK79Cq9F+eC7COvy1/G7b37n1HbVoKu4c9SdVDVWMe+reRyqbAmL8/Xx5cMLP3RkbcyvzedA+QGaZBN9Q/uSENSxo7z6Rhu3LNnMt61SAwgBn94wnoykUFbuK+bqN9c5HXPj5D7cOW0AQggaLDb2FdaQW15HTLCR/rFBBLaqjpRbXse+whoE0D8miISwLpzVcf2bsPx257ZLFsHgi7TX5irNF19bDGEpWnqA1knZivdpaQd8A7WFSoFqoNST8IgPXgixBJgMRAohcoGHpJRvdNT1ujt7yva4tC0/tJz5Q+ZTUlfiJO4AjU2N5FTnOAQ+LiCOuAA3S9M7iKJqs5O4gxbBl1lcQ0ZSKJsOl7sc8+nmI/zurFTCA/zwM+gYkhjCEDel8ODYsexdCpsFtr7n2r53WYvAG4Mh+cyjnyOqv7YpFG3oMIGXUl7RUefu6pTWl1JrqSXSFOmUXRGgwdpAYX0hJp2JKP+WkZi7TIt9QvsQoA/A7GsmwBBAraXWqT/UL7Rd7LXYmsirqMeg8yE+1HWkXFxtpq7BRnSwEZOv5jsP9NOTFG4ip00Cr/AAzY+eFO4qzgNjg/DvqtkZm6nIBWnV/OBtUx/XV0JdqeZD97fPGegMED8Cctc77xvjPYvHFF0Xj/vgexK2JhurjqziyuVXMuPTGdzx8x0crDjo6M+uyuavv/yVmZ/MZO7nc/km6xsabVqu8qGRQxkW1ZJ50agzcsvwW/A3+JMUlMR9Y5zDIH878Lf0De172jYfKa/n78t2MeX/fmb6cyt499dsaszaQimrrYnvdhUy68VfmPT0T9yyZDMHi2oALX/632cPQd+qJunUtBjS47UY7NEp4QxOaInHDvDVcfOUfhjdTK52CeqrYN3r8Mo4eHEUfPMAVLbKtnlkM7x9EbwwHBbNgJy1LX0jrgb/VqGnIb1gwAWdZ7ui26Jy0XQie8v2cvmyy7HKlrC3MbFjeP6c5/H18eVva/7G0oMtYW8CwdsXvO0Q9qK6IvaX76fOWkdqSKqTgDdYG9hfsZ/c6lwiTBEMCBtAcDusWHzphwM89Y1zeb2354/hrP5R7DhSyawXV9HU6k9oysBoXrxyBCZfHbYmyd6CKg6V1BJqMjAwLthppWlBZT2786sxW2z0iwmkb3QXLp6x/1stmVdrpj4C42+FqnytsEbVkZY+Uxgs+BnC7Ll7Sg9B0S7w0UF0ujaJqlCcACoO3kvIrsp2EneAdQXrKKwtxKg38kXmF059EklmZaZD4KP9o4n2d18WzU/vx+DIwQyOPKH57BOisq6R/27McWlfm1nGWf2jOFhc4yTuAN/vKaKwykxKZAA6H0FafAhp8e796LEhJmJDuvDkaGsyV7i2bX4bRsyDisPO4g5QXw7lmS0CH9Fb2xSKdkS5aE6RmsYajlQfcfF7g1anNL82n6K6Iqd2d3nSm3O++Bv8SQx0XUwU4uteHE+Fgkoz+ZX1uPvWVlVvIbe8jtqGlgeQ0VdHnyjXGqmJ9oiVUH/X/C7RQX74+3ZRN0sztSWaKFsbXfusDVpfbalze3iq675Rg7R0vMZgcFc03Nh+v1uFwh1K4E+BnSU7ueG7Gzj/k/O56fub2FW6y9FXVFfEy1teZvb/ZnPJZ5fw8b6PqWnU/NIDwgZwXvJ5jn0FgvvPuJ/YgFhC/UK5Z8w96EWLEIyKGUVaRNpp21tR18iiXzI579mfmfqvFbzy80FKWhWU3ny4nGvfXMdZT/7IH9/eyO48rTCEn17HTef0dVp01CcygLG9NX9xelwwUwa2fKPwEfD3OYOJDm6JUe9S2Cyw72vNnfL8cFh2m+Y6aaZkP3z6J3g+A96cBgd/aFmUlDoRwlqNwH0DNPeM3hfC+8KUB52vddYdWsoBhaIDUT74kyS/Np8rll1BqbllBBfjH8O7M94lxj+G93a/x+PrHnc65uVzX2ZCwgQAysxl7CndQ1lDGSnBKQwIG+DI+2JrsrGvfB+HKg8R7BvMwPCBTpE0p8qXO/K5/p1NTm3/unQYF49I5HBZHbNeXEVFXUuGyeQIfz760ziigjR/+b7CavYWVGPU+5AWH+IUc15cbWZXXjXldY30iQpgYFxw1837cmSTJu6y1UrSYVfAhc9r6QI+mq+l223GR6/VMI21R7yUH4bC7WA1a3706IEt+zZUQ8EObfQfkqAdo0bwinZA+eDbkdyqXCdxByisKyS3OpcgQxAf7//Y5ZhVR1Y5BD7cGM64BPe5QXQ+OgZFDGJQRPvWuVy2Nd+l7YP1OczJSOBwaa2TuANkl9aRU1bnEPj+MUH0j3E/ARoVZGTSgC46Ym9LyT5ncQfY/iFMvk/L+9Ja3EET/ZL9LQIf1uvok6N+QVos+7Hi2RWKdqaLDrVOnApzBYW1hdiabC59jbZGCmoLqG6sdntsaX2pS43SQN9Al/zpPsKHQEMgvj6+pAa7+mKTApNO4xM4U17bSEGlmaa2s5tAg9VGfkW9I4yxmT7Rrqly+8cE4eMjCDK6+tH1PoJAYxd49lcXaps7Gqq1HOoW11zyNDVpybrq2vjR3Y2og+LB1wS+/u7zwqhRuMKL6bYCb7FZWJGzgqu/vJrZS2fzr43/4khNSyRDZmUmD61+iAs/vZAF3y5gU2GLC6OmsYalB5Zy6bJLufizi1m0YxFlZq3QQmpIKtelX+d0rQVDFpASnIJep+ea9Gsw6VtcGLH+sZwZf/qjtgaLjW93FTDn378w9V8/89Q3e8mraFlEtL+wmjs+3MrZ//cT17653lGHFOCCwXGOBUYAwUY9l43WJnT7RAdy7ZnOZfb+PLW/oxC1V1JXButea6k1uv4NqGu1MjZ3A7wzF14cCZ/8Qcuu2EzlEfjxH/DSGbDwbNj1WctDIG4Y9Gr1uxI+cMFTEBAFwfEw/UlnO/pOdZ/sS6HwErqtD35b8Tau+uIqJC2fb17aPG4fdTv11npu++E2fi341dFn0pt4f8b79A7tzcrcldzw/Q1O53tk3CNc1E9bOl7ZUMmesj3k1+QTHxjPwIiBThEy+8v3s698HwYfAwPDB9Ir+PRjmjdklXHJK2uc2m6d0o/bzu1HldnCdW+tZ9PhCkdfkJ+ez26e4BDqQ8U17MqroglIiwtyijkvr21kZ14l+ZVmksL9GRwfTKCbkb3XsOMT+Mj5Iculb0PaLG1S9LWzwVzR0hc3DK5eqq0eXfEU/PB352Ov+7IlpW5VHuRvhfoKrUZp7JCWFamWBijYptUx9Y+A2GEQ3IWTnCm6BT3SB7+/fL+TuAN8tP8jrkq7iqrGKidxB6i31pNVlUXv0N5u86d/uO9DZvSega/OlxC/EM6IO+Oo1+4X1o9+Yf3a54PY2X6k0qXtvbWHuWpsL4qqGpzEHaC6wUpmcY1D4HtHBdLbTcgjQFiALxP6daEEVZvfcW3bskQT+LKDzuIOmmBXZGk+8w1vuR6bu75F4IPjtc0dBj9IGq1tCkUXoMu7aKSUFNcXU93g7EcP8nWdFIz2j8aoM2LSm/DXu+ZCCTBoYpgQ6JppsVdQL3TixOO7S6obKK9zE0cN1JqtFFaZsdqaXPqamiRFVWaq65396GH+vi77xoUYMRp0+Pvp8dO7/ioDvMGP3tQEVQUtZeTaUl8B1UXu+6yNUF0AjXXO7RFuHp4R9lW9fm4mg3W+YAjQYtLd5Up351tXKLoBXVrgC2oLeHnry8z9bC7zv57PL0d+wWqvfpMeke60lN9H+HDnqDsJMYaQEJjAn0f92elckxIm0T9My8g3OWkyYX4tBSSMOiO/HfRbdD7HF/iS6gbe+iWTmS+s4uJ/r2b5tjzqGlsWD23IKuO6xeuZ/uwKHvl8F1klLQulcsvrePqbvZz/3Ep++/paVh0owWafTB3eK9TJL67zEdw9fQBBRgPJ4f7cPc05pnrGkFgGHCXypdMoy9LynL8yDv5zEWSuaikKbWnQao2+OU3zo696VhPzZor3wGc3a/nR/3utcx3SjCuchdwvGIba0wREDYQhc53tOPt+CO+tFciY8oDzoqPQZEga244fWqHwHrqsD15KyYtbXmThtoWONh/hwzvnv8OQKC1s7Uj1EXaW7qSmsYZ+Yf0YGDEQg4/mT62z1LGrdBdZVVlEmiJJi0hzSgOQWZnJrtJdWJusDAwfyIDwE1uU8v76w9zzsXO90Hd/fwbj+0ayv7CaWS/+Qr2lJaLn/PRY/nVZBr56Hx77YhdvrMpy9Ol9BJ/eOJ4hCVqkRnZpLTuOVFLTYGVgbDCDE0LQ2ZN51ZgtbDtSSVZJLTHBRoYkhhAd5MHwRasFvrgTNi1qadP5wh9+hNjBkL0a3jrf+ZjzHoNxN2oTpm9fBPmbW/oCIuH3P7Qs7S/cpfnDQfOxR7cKLa0p1B4IVXmasMcPbyk03WSD/G1QuENbjBSfoe2jUHRRuqUPvri+mA/2fuDU1iSb2Fu+1yHwCUEJRy1s4W/wZ1TsKEbFur0vpIakkhriZvn5MahrtPL2mmyX9h/2FDG+byQHi2ucxB3gq10F3FlRj8lXx7trDzv1WZsk+wqqHQKfHBFAcoT76JZAo4FxfSIZ18dL3A3VebClja/c1qhFtMQOhsO/uh6zfqE2Oi/PchZ30NIHlB1sEfiYNG1zR2AM9J/mvs9HBwnDtU2h6OZ0WReNn48f4X7hLu2BBueJxNoGK2W1rvVLQfN3l9Y0YLa4xsiDVlau8ih+9EarjZKaBizWFj+63kcQG+I6ao4J1hYMmQyuz9MAXz1+Bh/89D6Eu/GzB/h1Ul4XawPUFGvL9d1RV3Z0P3pjnSbArb8N6v20jIlt8bM/oFqnx20mMFY7zuCvCXFbDK7zJgqF4uh0WYEPMYZwx6g7nBYdJQUlkR6ZDoCtSbLmYAnz3lrPrBd/4eWfDjgVfs4ureWxL3Yz4/lV3PjuJrbmVjj66hutfLWjgEtfXcOcf6/mw/WHqWgl9HsLqrnnk+3MeH4l9/9vO/sLNeHz1ev406Q+GHQtNoX5G5jUX4tQGRQfxOgUZ9G75/yBJIb5ExHox19nOo9I+0UHMjihExbSFOyA/90ACyfCF3dBcav0wDXFWsz5a+fAopmw7xvNf95Mzlp4/0p4dSL88A8ot3+DCYqFaY85Xyd2qLaBFm8e1KrClI8Ozr5Pc5uE94EJdzgfm36R5l9XKBQnTJf1wYO2mGlH6Q52l+4m2C+YoZFDHTHn23IquPjl1Vhbrfhsjhuvt9i47f0tfLOrZRVksFHP0hvHkxoVyKr9xVz1hnO90Ocuz2B2RgJFVWYufXUNWaUtkR39YgJZ8vuxRAb50dQk2ZFXybbcSowGH4YlhtKv1WRnXkU9W3IqKKg0MzAuiKGJoY5aow0WG9tyK9mZX0W4v4GMpDB6RXTwqLUyF944zzmdbdxwuPpjbZTdtl6oEDDvCy2ssHAXvH4OWFpVbRpxLVzwtJZky1Kv5Xcp2Ka5TRJGtrhYQFvmf2QDNNRqvvD44S0j97pyyNuoPWzCUiBhFAS5VrVSKHo63dIHD2DQGRgePZzh0a7+1N0FVU7iDvDW6kyuPCOJynqrk7gDVJmtHCiuITUqkC93FNCWxauzOH9wLJmltU7iDrC/sIas0loig/zw8REMTQxlaGKoW5vjQ01uy94B+Bl0jE4NZ3Sqq+upwyg96JqrPH8zlGVq0SZrX3bukxIyV2oCX7zHWdxB87tPuF1Ln2swQcp4bXNHZD9tc4d/GPQ9V9sUCsUp0WVdNK2pqrfQ0MaP7q62Z6jJF1+dDl+dD0aD60dvTovbuupQMzHBRnyEwKh37xM/qVJz1kYt/vtomCs1n7g7LPVgrnLfJ6V2XutR/OiNtdBQ49xmcPOwET6gN4GPQVum35Zm37q7Y32DXGuRKhQKj9ClBb6oysyi1Vlc9O/V/OE/G1iXWepIwjUkIYTEUOcJz3svGEhYgC+9wv25/VznKvSjksMYEKu5UqamxTjcJgAGnWD+hFT0Oh/6RgXymxHOkTlXjkmi94nmbsnbDJ8s0JbT//TPFp81aGF9q1/U/N0fXK1FmjS70JpskLUKllyupbRd/7oWDthM2SH4/hHtvJ/dCAWtQjUba2HPclh0Ibx1Aez4tGXCNLI/pM1xtvGM6yGij5Zga9LdmuA34x8OqWdpr2OGuOZimfo3CHEtXKJQKDqfLu2D//dPB3jyq5YJQYNO8Mn14xhid48cKq5hQ3Y5JdUNjEoJY2hiqGOkXVnfyNacSrbmVJAc4c/I5DASwlr83bvzq1ifVUajtYkxqeEMjg/Bxx5zXlRtZnN2OXsLqxkYF8yIpDAig1xH/S6U7NfE2dwq7cCwK+HCZ7XR8k+PablSmtH7we+/1/KhHNmo+cqbWpX8O+8fMO4mTaw/+h3s/7qlLyBKOzYsGQ58D+9c7GzL5e/BwBna66p8bbl+0R6IGwKJo1pG7jYr5G3SHjZ+QdBrrHPMeVmWNtFamQOJoyFhhPvVpAqFokPolj74oiozr6/MdGqz2CS78qocAn+s/CshJl8m9o9iYn/3OVgGxQUzKM590eroICPTBscxbXCc2/6jUrzHWdwBtr0PZ/1ZCwH89d/OfdYGKNzZIvCtxR1gzYsw9DJtJN9a3AFqi7X85mHJsM15vQCgfQPofz74+EBwnJbHJW2W6346PSSN0TZ3hKdom0Kh8Dq6rItGrxME+ukJNuqZMSiEMb20UaOxbT1Qq8U1l0lrGmo094c7rI3u84mD5jppqHGO/W6Nxexa01PvZpSvN2qjd53B/ci3+RiDGxeQX7B2nM7gvuanzn6syU3MeUCkJu4KhaLb0mX/w8MD/Pj3nESWjT/IS/X3sijoZZbNMZCRaI8blxJy1sHH87V8JxsWOfusKw7Dyn/BG1Ph89u05evNWBq0epvvXgKLL4Sd/3Ne5FO8D759AN44F757WHO9NFNfpaWzXTQDllwGB39sEfrowa4+68n3aqPswGg492/OfSG9tGX4YHebtFmleu6DYArVltqPu9W5L3lciytlyCXag6QZnQFG/c7tfVUoFN2HLu2Db/rlBXy+/WtLg94PfvcdxA3VYq9fP9c5GmXqozD+Fm10/fmtmnukmYBIu886RZvMXDTD+WKXvQODLtQW/rw9R8tl0kzCSLjyvxAQ4ZqrXAgt33hzIYmyTO38pQc0EU46QxNp0L4R5G6ArBVa1sOUsyCq1WRw0W7IXKG5X3pP1q7bHMlSW6L5wnPWasLea3xL+TgptZS5h37W3Dy9J0H8CDWCVyi6Ad3SB091IT6rn3NuszZoQhY3FPK3u4Ya/vIMDL1U84Nvb+OXri3RBDQsRavy05Y1/9bym5TudxZ30PzjZQe10m5rXnTukxL2fd0i8OGp2uYOv0DoM1nb3BE9yHmCszUBkdqk6cAZrn1C2BcSZbg/VqFQdEu67hDOR+c+N0mzz1rvmtcFgz8InXasj5tYbZ39GD83E7PGYMCnZR+XYw3aud3kocfX/USvQqFQdCRdV+ADIrXc3q3xj9CWuwPEZbj6rM95EAKjIDQFxt/m3BczGKLtuWAGznT2WQsfOPNGLaIkoh8MaDNKTr8YwvtqD5cJt2kj5mYM/lrtToVCoehkurQPXvNZr9NcIMEJ0G+qswujaDcc+E7Lt9LvPC3UrzlSpbZEy0me+TNEp2s+7Qh7XnAptXzi+78BSx30n65Ncjav0KzMhayVcHgdJJ8JyeMhxL74ydqo+dH3faldq995yjWiUCg6jGP54Lu2wCsUCkUP51gC33VdNAqFQqE4Jh0q8EKI6UKIvUKIA0KIezryWgqFQqFwpsMEXgihA14CzgfSgCuEEEepsaZQKBSK9qYjR/BjgANSykNSykbgfWB2B15PoVAoFK3oSIFPAHJavc+1tzkhhFgghNgghNhQXFzcgeYoFApFz6IjBV64aXMJ2ZFSLpRSjpJSjoqKcp/ZUaFQKBQnT0emKsgFklq9TwTyjnXAxo0bS4QQ2cfap4sRCZR42ogugLpPJ4a6TydOT7pXyUfr6LA4eCGEHtgHTAGOAOuBK6WUOzvkgl6IEGLD0eJTFS2o+3RiqPt04qh7pdFhI3gppVUIcRPwNaAD3uxJ4q5QKBSepkOzSUopvwC+6MhrKBQKhcI9aiVrx7LQ0wZ0EdR9OjHUfTpx1L3Cy3LRKBQKhaL9UCN4hUKh6KYogVcoFIpuihL4k0AI8aYQokgIsaNV2zAhxBohxHYhxOdCiGB7u0EIsdjevlsIcW+rY0ba2w8IIZ4XQrhbFNalOcl75SuEeMvevlUIMbnVMd32XgkhkoQQP9r/PnYKIW61t4cLIb4VQuy3/wxrdcy99nuxVwgxrVV7t71PcPL3SggRYd+/RgjxYptzdet75YSUUm0nuAETgRHAjlZt64FJ9tfzgUftr68E3re/9geygBT7+3XAmWirfb8Ezvf0Z/PwvboReMv+OhrYCPh093sFxAEj7K+D0NaNpAFPAvfY2+8B/ml/nQZsBfyAVOAgoOvu9+kU71UAMAH4E/Bim3N163vVelMj+JNASrkCKGvTPABYYX/9LfCb5t2BAPuCLxPQCFQJIeKAYCnlGqn9tf0HmNPRtnc2J3mv0oDv7ccVARXAqO5+r6SU+VLKTfbX1cButHxNs4HF9t0W0/KZZ6MNGhqklJnAAWBMd79PcPL3SkpZK6VcBZhbn6cn3KvWKIE/fXYAs+yv59KSnuEjoBbIBw4DT0spy9D+KHNbHe82CVs35Wj3aiswWwihF0KkAiPtfT3mXgkhUoDhwFogRkqZD5qwoX2rgaMn8Osx9wlO+F4djR51r5TAnz7zgRuFEBvRvjo22tvHADYgHu3r9B1CiN6cYBK2bsrR7tWbaP9oG4BngdWAlR5yr4QQgcDHwG1Syqpj7eqmTR6jvdtxEvfqqKdw09Yt7xV08ErWnoCUcg9wHoAQoj8ww951JfCVlNICFAkhfgFGASvREq81c9wkbN2Fo90rKaUVuL15PyHEamA/UE43v1dCCAOaYL0rpfzE3lwohIiTUubbXQpF9vajJfDLpZvfJzjpe3U0esS9akaN4E8TIUS0/acP8FfgFXvXYeAcoREAjAX22L9GVgshxtpn768BlnrA9E7naPdKCOFvv0cIIaYCVinlru5+r+yf6Q1gt5TyX626PgOutb++lpbP/BlwuRDCz+7K6ges6+73CU7pXrmlJ9wrJzw9y9uVNmAJmk/dgjYS+B1wK9qM/j7gCVpWBwcC/wV2AruAu1qdZxSaP/og8GLzMd1pO8l7lQLsRZs4+w5I7gn3Ci3KQwLbgC327QIgAm3Seb/9Z3irY+6334u9tIr+6M736TTuVRbaRH+N/W8wrSfcq9abSlWgUCgU3RTlolEoFIpuihJ4hUKh6KYogVcoFIpuihJ4hUKh6KYogVcoFIpuihJ4hUKh6KYogVco2hEhhM7TNigUzSiBV/RYhBCPNucVt7//hxDiFiHEXUKI9UKIbUKIv7Xq/58QYqM9H/mCVu01QohHhBBr0dLQKhRegRJ4RU/mDezL3O3pEy4HCtFSAIwBMoCRQoiJ9v3nSylHoq2EvEUIEWFvD0DLe3+G1FLUKhRegUo2puixSCmzhBClQojhQAywGRiNlhBts323QDTBX4Em6hfZ25Ps7aVoWUM/7kzbFYoTQQm8oqfzOjAPiEVLWzwFeFxK+WrrnexlBM8FzpRS1gkhfgKM9m6zlNLWSfYqFCeMctEoejqfAtPRRu5f27f59rzjCCES7FkwQ4Byu7gPRMsOqlB4NWoEr+jRSCkbhRA/AhX2Ufg3QohBwBp7LeYa4CrgK+BPQohtaJkcf/WUzQrFiaKySSp6NPbJ1U3AXCnlfk/bo1C0J8pFo+ixCCHS0ApXf6/EXdEdUSN4hUKh6KaoEbxCoVB0U5TAKxQKRTdFCbxCoVB0U5TAKxQKRTdFCbxCoVB0U/4fac/BnlFo7jYAAAAASUVORK5CYII=\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "bmi_trend(df, some_country='India')" + ] + }, + { + "cell_type": "code", + "execution_count": 131, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "bmi_trend(df, some_country='Samoa')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Try [bar_chart_race](https://github.com/dexplo/bar_chart_race)" + ] + }, + { + "cell_type": "code", + "execution_count": 132, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Collecting bar_chart_race\n", + " Downloading bar_chart_race-0.1.0-py3-none-any.whl (156 kB)\n", + "Requirement already satisfied: matplotlib>=3.1 in c:\\program files\\anaconda3\\envs\\glabs_data_science_learn\\lib\\site-packages (from bar_chart_race) (3.4.1)\n", + "Requirement already satisfied: pandas>=0.24 in c:\\program files\\anaconda3\\envs\\glabs_data_science_learn\\lib\\site-packages (from bar_chart_race) (1.2.4)\n", + "Requirement already satisfied: pillow>=6.2.0 in c:\\program files\\anaconda3\\envs\\glabs_data_science_learn\\lib\\site-packages (from matplotlib>=3.1->bar_chart_race) (8.1.2)\n", + "Requirement already satisfied: numpy>=1.16 in c:\\program files\\anaconda3\\envs\\glabs_data_science_learn\\lib\\site-packages (from matplotlib>=3.1->bar_chart_race) (1.20.2)\n", + "Requirement already satisfied: kiwisolver>=1.0.1 in c:\\program files\\anaconda3\\envs\\glabs_data_science_learn\\lib\\site-packages (from matplotlib>=3.1->bar_chart_race) (1.3.1)\n", + "Requirement already satisfied: cycler>=0.10 in c:\\program files\\anaconda3\\envs\\glabs_data_science_learn\\lib\\site-packages (from matplotlib>=3.1->bar_chart_race) (0.10.0)\n", + "Requirement already satisfied: pyparsing>=2.2.1 in c:\\program files\\anaconda3\\envs\\glabs_data_science_learn\\lib\\site-packages (from matplotlib>=3.1->bar_chart_race) (2.4.7)\n", + "Requirement already satisfied: python-dateutil>=2.7 in c:\\program files\\anaconda3\\envs\\glabs_data_science_learn\\lib\\site-packages (from matplotlib>=3.1->bar_chart_race) (2.8.1)\n", + "Requirement already satisfied: six in c:\\program files\\anaconda3\\envs\\glabs_data_science_learn\\lib\\site-packages (from cycler>=0.10->matplotlib>=3.1->bar_chart_race) (1.15.0)\n", + "Requirement already satisfied: pytz>=2017.3 in c:\\program files\\anaconda3\\envs\\glabs_data_science_learn\\lib\\site-packages (from pandas>=0.24->bar_chart_race) (2021.1)\n", + "Installing collected packages: bar-chart-race\n", + "Successfully installed bar-chart-race-0.1.0\n" + ] + } + ], + "source": [ + "!pip install bar_chart_race" + ] + }, + { + "cell_type": "code", + "execution_count": 155, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Collecting package metadata (current_repodata.json): ...working... done\n", + "Solving environment: ...working... done\n", + "\n", + "# All requested packages already installed.\n", + "\n" + ] + } + ], + "source": [ + "!conda install -c conda-forge ffmpeg -y" + ] + }, + { + "cell_type": "code", + "execution_count": 160, + "metadata": {}, + "outputs": [], + "source": [ + "import bar_chart_race as bcr\n", + "import warnings\n", + "warnings.filterwarnings('ignore')" + ] + }, + { + "cell_type": "code", + "execution_count": 139, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
countryAfghanistanAlbaniaAlgeriaAndorraAngolaAntigua and BarbudaArgentinaArmeniaAustraliaAustria...United Republic of TanzaniaUnited States of AmericaUruguayUzbekistanVanuatuVenezuela (Bolivarian Republic of)Viet NamYemenZambiaZimbabwe
year
19750.56.56.912.90.85.611.27.310.57.2...0.911.911.44.45.69.60.12.81.53.7
19760.56.77.213.50.95.811.57.610.87.3...0.912.211.74.65.89.90.12.91.63.9
19770.66.87.414.00.96.011.87.911.07.5...1.012.611.94.86.110.30.23.11.74.0
19780.67.07.714.51.06.212.18.111.37.7...1.113.012.25.06.310.70.23.31.94.2
19790.67.28.015.01.16.412.58.411.67.9...1.213.312.55.26.611.00.23.42.04.4
\n", + "

5 rows × 191 columns

\n", + "
" + ], + "text/plain": [ + "country Afghanistan Albania Algeria Andorra Angola Antigua and Barbuda \\\n", + "year \n", + "1975 0.5 6.5 6.9 12.9 0.8 5.6 \n", + "1976 0.5 6.7 7.2 13.5 0.9 5.8 \n", + "1977 0.6 6.8 7.4 14.0 0.9 6.0 \n", + "1978 0.6 7.0 7.7 14.5 1.0 6.2 \n", + "1979 0.6 7.2 8.0 15.0 1.1 6.4 \n", + "\n", + "country Argentina Armenia Australia Austria ... \\\n", + "year ... \n", + "1975 11.2 7.3 10.5 7.2 ... \n", + "1976 11.5 7.6 10.8 7.3 ... \n", + "1977 11.8 7.9 11.0 7.5 ... \n", + "1978 12.1 8.1 11.3 7.7 ... \n", + "1979 12.5 8.4 11.6 7.9 ... \n", + "\n", + "country United Republic of Tanzania United States of America Uruguay \\\n", + "year \n", + "1975 0.9 11.9 11.4 \n", + "1976 0.9 12.2 11.7 \n", + "1977 1.0 12.6 11.9 \n", + "1978 1.1 13.0 12.2 \n", + "1979 1.2 13.3 12.5 \n", + "\n", + "country Uzbekistan Vanuatu Venezuela (Bolivarian Republic of) Viet Nam \\\n", + "year \n", + "1975 4.4 5.6 9.6 0.1 \n", + "1976 4.6 5.8 9.9 0.1 \n", + "1977 4.8 6.1 10.3 0.2 \n", + "1978 5.0 6.3 10.7 0.2 \n", + "1979 5.2 6.6 11.0 0.2 \n", + "\n", + "country Yemen Zambia Zimbabwe \n", + "year \n", + "1975 2.8 1.5 3.7 \n", + "1976 2.9 1.6 3.9 \n", + "1977 3.1 1.7 4.0 \n", + "1978 3.3 1.9 4.2 \n", + "1979 3.4 2.0 4.4 \n", + "\n", + "[5 rows x 191 columns]" + ] + }, + "execution_count": 139, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_wide = df.loc[df.gender=='Both'].pivot(index='year',columns='country',values='BMI')\n", + "df_wide.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 178, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 178, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "bcr.bar_chart_race(\n", + " df=df_wide, \n", + " filename=None, \n", + " sort='desc', \n", + " steps_per_period=1, \n", + " title='BMI by Country', \n", + " n_bars=20,\n", + " figsize=(5,4),\n", + " bar_kwargs={'alpha': .7},\n", + ") " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.8" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/Email Classification - Spam or Ham using logistic regression/notebook/Email Classification - Spam or Ham using logistic regression-MK.ipynb b/Email Classification - Spam or Ham using logistic regression/notebook/Email Classification - Spam or Ham using logistic regression-MK.ipynb new file mode 100644 index 0000000..ba81f5d --- /dev/null +++ b/Email Classification - Spam or Ham using logistic regression/notebook/Email Classification - Spam or Ham using logistic regression-MK.ipynb @@ -0,0 +1,1548 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "

\n", + " \n", + " \n", + " View in Colab\n", + " \n", + "

" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Resources used in the session:\n", + "\n", + "- [Wiki Confusion Matrix](https://en.wikipedia.org/wiki/Confusion_matrix)\n", + "- [XKCD Machine Learning](https://xkcd.com/1838/)\n", + "- [Tuning Hyper Parameters](https://scikit-learn.org/stable/modules/grid_search.html#exhaustive-grid-search)\n", + "- [Model Specific Cross Validation](https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LogisticRegressionCV.html#sklearn.linear_model.LogisticRegressionCV)\n", + "- [Increase accuracy of Logistic regression](https://stackoverflow.com/questions/38077190/how-to-increase-the-model-accuracy-of-logistic-regression-in-scikit-python)\n", + "- [Order of feature/model selection and parameter tuning](https://stats.stackexchange.com/questions/264533/how-should-feature-selection-and-hyperparameter-optimization-be-ordered-in-the-m)\n", + "- Scikit-Learn Pipeline [[1]](https://scikit-learn.org/stable/tutorial/statistical_inference/putting_together.html) [[2]](https://scikit-learn.org/stable/auto_examples/compose/plot_compare_reduction.html)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Classifying Email as Spam or Non-Spam\n", + "\n", + "[Source: UCI ML Repo: Spambase Data Set](https://archive.ics.uci.edu/ml/datasets/spambase) \n", + "\n", + "Our collection of spam e-mails came from our postmaster and individuals who had filed spam. Our collection of non-spam e-mails came from filed work and personal e-mails, and hence the word 'george' and the area code '650' are indicators of non-spam. These are useful when constructing a personalized spam filter. One would either have to blind such non-spam indicators or get a very wide collection of non-spam to generate a general purpose spam filter. \n", + "\n", + "- Number of Instances: 4601 (1813 Spam = 39.4%)\n", + "- Number of Attributes: 58 (57 continuous, 1 nominal class label)\n", + "\n", + "- Attribute Information:\n", + "\n", + " - The last column of 'spambase.data' denotes whether the e-mail was \n", + " considered spam (1) or not (0)\n", + " \n", + " - 48 attributes are continuous real [0,100] numbers of type `word freq WORD` i.e. percentage of words in the e-mail that match WORD\n", + "\n", + " - 6 attributes are continuous real [0,100] numbers of type `char freq CHAR` i.e. percentage of characters in the e-mail that match CHAR\n", + "\n", + " - 1 attribute is continuous real [1,...] numbers of type `capital run length average` i.e. average length of uninterrupted sequences of capital letters\n", + "\n", + " - 1 attribute is continuous integer \\[1,...\\] numbers of type\n", + "`capital run length longest` i.e. length of longest uninterrupted sequence of capital letters\n", + "\n", + " - 1 attribute is continuous integer \\[1,...\\] numbers of type `capital run length total` i.e.\n", + "sum of length of uninterrupted sequences of capital letters in the email\n", + "\n", + " - 1 attribute is nominal {0,1} class of type spam i.e denotes whether the e-mail was considered spam (1) or not (0), \n", + "\n", + "- Missing Attribute Values: None\n", + "\n", + "- Class Distribution: \n", + "\n", + "\n", + "\n", + "
Spam1813(39.4%)
Non-Spam2788(60.6%)
\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "# Importing necessary libraries\n", + "import pandas as pd\n", + "import numpy as np\n", + "from sklearn.linear_model import LogisticRegression\n", + "from sklearn.model_selection import train_test_split\n", + "from sklearn.feature_selection import chi2\n", + "from sklearn.feature_selection import f_classif\n", + "from sklearn.feature_selection import SelectKBest\n", + "from sklearn.decomposition import PCA\n", + "from sklearn.metrics import classification_report,confusion_matrix\n", + "from sklearn.pipeline import Pipeline\n", + "import warnings\n", + "warnings.filterwarnings(\"ignore\")\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "---\n", + "## Part A: Base Model" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "----\n", + "### Task 1: Load the data stored in `path` using `.read_csv()` api." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
0123456789...48495051525354555657
00.000.640.640.00.320.000.000.000.000.00...0.000.0000.00.7780.0000.0003.756612781
10.210.280.500.00.140.280.210.070.000.94...0.000.1320.00.3720.1800.0485.11410110281
20.060.000.710.01.230.190.190.120.640.25...0.010.1430.00.2760.1840.0109.82148522591
30.000.000.000.00.630.000.310.630.310.63...0.000.1370.00.1370.0000.0003.537401911
40.000.000.000.00.630.000.310.630.310.63...0.000.1350.00.1350.0000.0003.537401911
\n", + "

5 rows × 58 columns

\n", + "
" + ], + "text/plain": [ + " 0 1 2 3 4 5 6 7 8 9 ... 48 \\\n", + "0 0.00 0.64 0.64 0.0 0.32 0.00 0.00 0.00 0.00 0.00 ... 0.00 \n", + "1 0.21 0.28 0.50 0.0 0.14 0.28 0.21 0.07 0.00 0.94 ... 0.00 \n", + "2 0.06 0.00 0.71 0.0 1.23 0.19 0.19 0.12 0.64 0.25 ... 0.01 \n", + "3 0.00 0.00 0.00 0.0 0.63 0.00 0.31 0.63 0.31 0.63 ... 0.00 \n", + "4 0.00 0.00 0.00 0.0 0.63 0.00 0.31 0.63 0.31 0.63 ... 0.00 \n", + "\n", + " 49 50 51 52 53 54 55 56 57 \n", + "0 0.000 0.0 0.778 0.000 0.000 3.756 61 278 1 \n", + "1 0.132 0.0 0.372 0.180 0.048 5.114 101 1028 1 \n", + "2 0.143 0.0 0.276 0.184 0.010 9.821 485 2259 1 \n", + "3 0.137 0.0 0.137 0.000 0.000 3.537 40 191 1 \n", + "4 0.135 0.0 0.135 0.000 0.000 3.537 40 191 1 \n", + "\n", + "[5 rows x 58 columns]" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df = pd.read_csv('../data/email_data.csv', header=None)\n", + "df.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Insight: With no headers we won't know which `WORDs` or `CHARs` are used in the dataset. However, this does not stop us from using an ML algorithm\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "----\n", + "### Task 2: Get an overview of your data by using info() and describe() functions of pandas.\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "RangeIndex: 4601 entries, 0 to 4600\n", + "Data columns (total 58 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 0 4601 non-null float64\n", + " 1 1 4601 non-null float64\n", + " 2 2 4601 non-null float64\n", + " 3 3 4601 non-null float64\n", + " 4 4 4601 non-null float64\n", + " 5 5 4601 non-null float64\n", + " 6 6 4601 non-null float64\n", + " 7 7 4601 non-null float64\n", + " 8 8 4601 non-null float64\n", + " 9 9 4601 non-null float64\n", + " 10 10 4601 non-null float64\n", + " 11 11 4601 non-null float64\n", + " 12 12 4601 non-null float64\n", + " 13 13 4601 non-null float64\n", + " 14 14 4601 non-null float64\n", + " 15 15 4601 non-null float64\n", + " 16 16 4601 non-null float64\n", + " 17 17 4601 non-null float64\n", + " 18 18 4601 non-null float64\n", + " 19 19 4601 non-null float64\n", + " 20 20 4601 non-null float64\n", + " 21 21 4601 non-null float64\n", + " 22 22 4601 non-null float64\n", + " 23 23 4601 non-null float64\n", + " 24 24 4601 non-null float64\n", + " 25 25 4601 non-null float64\n", + " 26 26 4601 non-null float64\n", + " 27 27 4601 non-null float64\n", + " 28 28 4601 non-null float64\n", + " 29 29 4601 non-null float64\n", + " 30 30 4601 non-null float64\n", + " 31 31 4601 non-null float64\n", + " 32 32 4601 non-null float64\n", + " 33 33 4601 non-null float64\n", + " 34 34 4601 non-null float64\n", + " 35 35 4601 non-null float64\n", + " 36 36 4601 non-null float64\n", + " 37 37 4601 non-null float64\n", + " 38 38 4601 non-null float64\n", + " 39 39 4601 non-null float64\n", + " 40 40 4601 non-null float64\n", + " 41 41 4601 non-null float64\n", + " 42 42 4601 non-null float64\n", + " 43 43 4601 non-null float64\n", + " 44 44 4601 non-null float64\n", + " 45 45 4601 non-null float64\n", + " 46 46 4601 non-null float64\n", + " 47 47 4601 non-null float64\n", + " 48 48 4601 non-null float64\n", + " 49 49 4601 non-null float64\n", + " 50 50 4601 non-null float64\n", + " 51 51 4601 non-null float64\n", + " 52 52 4601 non-null float64\n", + " 53 53 4601 non-null float64\n", + " 54 54 4601 non-null float64\n", + " 55 55 4601 non-null int64 \n", + " 56 56 4601 non-null int64 \n", + " 57 57 4601 non-null int64 \n", + "dtypes: float64(55), int64(3)\n", + "memory usage: 2.0 MB\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
0123456789...48495051525354555657
count4601.0000004601.0000004601.0000004601.0000004601.0000004601.0000004601.0000004601.0000004601.0000004601.000000...4601.0000004601.0000004601.0000004601.0000004601.0000004601.0000004601.0000004601.0000004601.0000004601.000000
mean0.1045530.2130150.2806560.0654250.3122230.0959010.1142080.1052950.0900670.239413...0.0385750.1390300.0169760.2690710.0758110.0442385.19151552.172789283.2892850.394045
std0.3053581.2905750.5041431.3951510.6725130.2738240.3914410.4010710.2786160.644755...0.2434710.2703550.1093940.8156720.2458820.42934231.729449194.891310606.3478510.488698
min0.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.000000...0.0000000.0000000.0000000.0000000.0000000.0000001.0000001.0000001.0000000.000000
25%0.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.000000...0.0000000.0000000.0000000.0000000.0000000.0000001.5880006.00000035.0000000.000000
50%0.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.000000...0.0000000.0650000.0000000.0000000.0000000.0000002.27600015.00000095.0000000.000000
75%0.0000000.0000000.4200000.0000000.3800000.0000000.0000000.0000000.0000000.160000...0.0000000.1880000.0000000.3150000.0520000.0000003.70600043.000000266.0000001.000000
max4.54000014.2800005.10000042.81000010.0000005.8800007.27000011.1100005.26000018.180000...4.3850009.7520004.08100032.4780006.00300019.8290001102.5000009989.00000015841.0000001.000000
\n", + "

8 rows × 58 columns

\n", + "
" + ], + "text/plain": [ + " 0 1 2 3 4 \\\n", + "count 4601.000000 4601.000000 4601.000000 4601.000000 4601.000000 \n", + "mean 0.104553 0.213015 0.280656 0.065425 0.312223 \n", + "std 0.305358 1.290575 0.504143 1.395151 0.672513 \n", + "min 0.000000 0.000000 0.000000 0.000000 0.000000 \n", + "25% 0.000000 0.000000 0.000000 0.000000 0.000000 \n", + "50% 0.000000 0.000000 0.000000 0.000000 0.000000 \n", + "75% 0.000000 0.000000 0.420000 0.000000 0.380000 \n", + "max 4.540000 14.280000 5.100000 42.810000 10.000000 \n", + "\n", + " 5 6 7 8 9 ... \\\n", + "count 4601.000000 4601.000000 4601.000000 4601.000000 4601.000000 ... \n", + "mean 0.095901 0.114208 0.105295 0.090067 0.239413 ... \n", + "std 0.273824 0.391441 0.401071 0.278616 0.644755 ... \n", + "min 0.000000 0.000000 0.000000 0.000000 0.000000 ... \n", + "25% 0.000000 0.000000 0.000000 0.000000 0.000000 ... \n", + "50% 0.000000 0.000000 0.000000 0.000000 0.000000 ... \n", + "75% 0.000000 0.000000 0.000000 0.000000 0.160000 ... \n", + "max 5.880000 7.270000 11.110000 5.260000 18.180000 ... \n", + "\n", + " 48 49 50 51 52 \\\n", + "count 4601.000000 4601.000000 4601.000000 4601.000000 4601.000000 \n", + "mean 0.038575 0.139030 0.016976 0.269071 0.075811 \n", + "std 0.243471 0.270355 0.109394 0.815672 0.245882 \n", + "min 0.000000 0.000000 0.000000 0.000000 0.000000 \n", + "25% 0.000000 0.000000 0.000000 0.000000 0.000000 \n", + "50% 0.000000 0.065000 0.000000 0.000000 0.000000 \n", + "75% 0.000000 0.188000 0.000000 0.315000 0.052000 \n", + "max 4.385000 9.752000 4.081000 32.478000 6.003000 \n", + "\n", + " 53 54 55 56 57 \n", + "count 4601.000000 4601.000000 4601.000000 4601.000000 4601.000000 \n", + "mean 0.044238 5.191515 52.172789 283.289285 0.394045 \n", + "std 0.429342 31.729449 194.891310 606.347851 0.488698 \n", + "min 0.000000 1.000000 1.000000 1.000000 0.000000 \n", + "25% 0.000000 1.588000 6.000000 35.000000 0.000000 \n", + "50% 0.000000 2.276000 15.000000 95.000000 0.000000 \n", + "75% 0.000000 3.706000 43.000000 266.000000 1.000000 \n", + "max 19.829000 1102.500000 9989.000000 15841.000000 1.000000 \n", + "\n", + "[8 rows x 58 columns]" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.info()\n", + "df.describe()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Insight: Data is clean and all features are numeric. Also many word frequencies appear to be zero (looking at their quartile values), indicating these words maybe the ones that help decide spam from not-spam." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "---\n", + "### Task 3.1: Split the data into train and test set and fit the base logistic regression model on train set." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "LogisticRegression(random_state=101)" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "X = df.iloc[:,:-1]\n", + "y = df.iloc[:,-1]\n", + "X_train, X_test, y_train, y_test = train_test_split(X, y , test_size= 0.3, random_state = 42)\n", + "lr = LogisticRegression(random_state=101)\n", + "lr.fit(X_train, y_train)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Task 3.2 Compare predicted values and observed values" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Prediction for 10 observation: [0 0 0 0 0 1 0 0 0 0]\n", + "Actual values for 10 observation: [0 0 0 1 0 1 0 0 0 0]\n" + ] + } + ], + "source": [ + "# Compare observed value and Predicted value\n", + "print(\"Prediction for 10 observation: \",lr.predict(X_test[0:10]))\n", + "print(\"Actual values for 10 observation: \",y_test[0:10].values)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Insight: Fantastic, 9/10 are correct predictions" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Task 3.3 Find out the accuracy, print out the Classification report and Confusion Matrix." + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Accuracy on test data: 0.9210716871832005\n" + ] + } + ], + "source": [ + "print(\"Accuracy on test data:\", lr.score(X_test,y_test))" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Confusion Matrix: \n", + " [[750 54]\n", + " [ 55 522]]\n" + ] + }, + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "y_pred = lr.predict(X_test)\n", + "print(\"Confusion Matrix: \\n\",confusion_matrix(y_test,y_pred))\n", + "\n", + "## see the plot\n", + "from sklearn.metrics import plot_confusion_matrix\n", + "plot_confusion_matrix(lr, X_test, y_test)" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Classification Report: \n", + " precision recall f1-score support\n", + "\n", + " 0 0.93 0.93 0.93 804\n", + " 1 0.91 0.90 0.91 577\n", + "\n", + " accuracy 0.92 1381\n", + " macro avg 0.92 0.92 0.92 1381\n", + "weighted avg 0.92 0.92 0.92 1381\n", + "\n" + ] + } + ], + "source": [ + "print(\"Classification Report: \\n\",classification_report(y_test,y_pred))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Insight: Base model is jus the start, but this time its a pretty good start, lets see if we can improve on it." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "---\n", + "## Part B: Feature Selection" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "---\n", + "### Task 4: Copy dataset df into df1 variable and apply correlation on df1" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "df1 = df.copy()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "---\n", + "### Task 5.1: As we have learned one of the assumptions of Logistic Regression model is that the independent features should not be correlated to each other (i.e no multicolinearity).\n", + "\n", + "So we have to find the features that have a correlation higher that 0.75 and remove the same so that the assumption for logistic regression model is satisfied. " + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Columns to be dropped: \n", + "[33, 39]\n" + ] + } + ], + "source": [ + "# Remove correlated features \n", + "## Adapted from \n", + "## https://chrisalbon.com/machine_learning/feature_selection/drop_highly_correlated_features/\n", + "corr_matrix = df1.drop(57, axis=1).corr().abs()\n", + "upper_mask = np.triu(np.ones(corr_matrix.shape),k=1).astype(np.bool)\n", + "upper = corr_matrix.where(upper_mask)\n", + "to_drop = [column for column in upper.columns if any(upper[column] > 0.75)]\n", + "print(\"Columns to be dropped: \")\n", + "print(to_drop)\n", + "df1.drop(to_drop,axis=1,inplace=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Task 5.2: Split the new subset of the data acquired by feature selection into train and test set and fit the logistic regression model on train set." + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "LogisticRegression(random_state=101)" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "X = df1.iloc[:,:-1]\n", + "y = df1.iloc[:,-1]\n", + "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3,random_state = 42)\n", + "lr = LogisticRegression(random_state=101)\n", + "lr.fit(X_train,y_train)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Task 5.3 Find out the accuracy, print out the Classification report and Confusion Matrix." + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Accuracy on test data: 0.9210716871832005\n", + "Confusion Matrix: \n", + " [[746 58]\n", + " [ 51 526]]\n", + "Classification Report: \n", + " precision recall f1-score support\n", + "\n", + " 0 0.94 0.93 0.93 804\n", + " 1 0.90 0.91 0.91 577\n", + "\n", + " accuracy 0.92 1381\n", + " macro avg 0.92 0.92 0.92 1381\n", + "weighted avg 0.92 0.92 0.92 1381\n", + "\n" + ] + } + ], + "source": [ + "print(\"Accuracy on test data:\", lr.score(X_test,y_test))\n", + "y_pred = lr.predict(X_test)\n", + "print(\"Confusion Matrix: \\n\",confusion_matrix(y_test,y_pred))\n", + "print(\"Classification Report: \\n\",classification_report(y_test,y_pred))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Insight: After removing highly correlated features, there is not much change in the score. " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "---\n", + "### Task 6.1: Lets apply a feature selection technique (Chi Squared test) to see whether we can increase our accuracy score. \n", + "\n", + "Find the optimum number of features using Chi Square and fit the logistic model on train data." + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "For no of features= 20 , score= 0.9015206372194062\n", + "For no of features= 25 , score= 0.9102099927588704\n", + "For no of features= 30 , score= 0.9116582186821144\n", + "For no of features= 35 , score= 0.9225199131064447\n", + "For no of features= 40 , score= 0.9210716871832005\n", + "For no of features= 50 , score= 0.9232440260680667\n", + "For no of features= 55 , score= 0.9210716871832005\n", + "High Score is: 0.9232440260680667 with features= 50\n" + ] + } + ], + "source": [ + "# let us try selecting different number of features using chi2 test\n", + "nof_list = [20,25,30,35,40,50,55]\n", + "high_score = 0\n", + "nof = 0\n", + "best_chi_model = None\n", + "best_chi_X_train = None\n", + "best_chi_X_test = None\n", + "\n", + "for n in nof_list:\n", + " test = SelectKBest(score_func=chi2 , k= n )\n", + " X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3,random_state = 42)\n", + " X_train = test.fit_transform(X_train,y_train)\n", + " X_test = test.transform(X_test)\n", + " \n", + " chi_model = LogisticRegression(random_state=101)\n", + " chi_model.fit(X_train,y_train)\n", + " print(\"For no of features=\",n,\", score=\", chi_model.score(X_test,y_test))\n", + " if chi_model.score(X_test,y_test)>high_score:\n", + " high_score = chi_model.score(X_test,y_test)\n", + " nof = n \n", + " best_chi_model = chi_model\n", + " best_chi_X_train = X_train\n", + " best_chi_X_test = X_test\n", + "print(\"High Score is:\",high_score, \"with features=\",nof)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Task 6.2 Print out the Confusion Matrix." + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Confusion Matrix: \n", + " [[755 49]\n", + " [ 57 520]]\n" + ] + } + ], + "source": [ + "y_pred = best_chi_model.predict(best_chi_X_test)\n", + "print(\"Confusion Matrix: \\n\",confusion_matrix(y_test,y_pred))" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": { + "jupyter": { + "source_hidden": true + }, + "tags": [ + "hide" + ] + }, + "outputs": [ + { + "data": { + "text/markdown": [ + "#### Insight: Using chi squared test there is no or very little change in the score and the optimum features that we got is 50." + ], + "text/plain": [ + "" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from IPython.display import Markdown as md\n", + "md(\"#### Insight: Using chi squared test there is no or very little change in \\\n", + "the score and the optimum features that we got is {}.\".format(nof))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "---\n", + "### Task 7.1 Now lets see if we can increase our score using another feature selection technique called Anova.\n", + "\n", + "Find the optimum number of features using Anova and fit the logistic model on train data." + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "For no of features= 20 , score= 0.8855901520637219\n", + "For no of features= 25 , score= 0.9015206372194062\n", + "For no of features= 30 , score= 0.9174511223750905\n", + "For no of features= 35 , score= 0.9181752353367125\n", + "For no of features= 40 , score= 0.9160028964518465\n", + "For no of features= 50 , score= 0.9246922519913107\n", + "For no of features= 55 , score= 0.9210716871832005\n", + "High Score is: 0.9246922519913107 with features= 50\n" + ] + } + ], + "source": [ + "# let us try selecting different number of features using anova test\n", + "nof_list = [20,25,30,35,40,50,55]\n", + "high_score = 0\n", + "nof = 0\n", + "best_anova_model = None\n", + "best_anova_X_train = None\n", + "best_anova_X_test = None\n", + "\n", + "for n in nof_list:\n", + " test = SelectKBest(score_func=f_classif , k= n )\n", + " X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state = 42)\n", + " X_train = test.fit_transform(X_train,y_train)\n", + " X_test = test.transform(X_test)\n", + " anova_model = LogisticRegression()\n", + " anova_model.fit(X_train,y_train)\n", + " print(\"For no of features=\",n,\", score=\", anova_model.score(X_test,y_test))\n", + "\n", + " if anova_model.score(X_test,y_test)>high_score:\n", + " high_score = anova_model.score(X_test,y_test)\n", + " nof = n \n", + " best_anova_model = anova_model\n", + " best_anova_X_train = X_train\n", + " best_anova_X_test = X_test\n", + "print(\"High Score is:\",high_score, \"with features=\",nof)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Task 7.2 Print out the Confusion Matrix." + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Confusion Matrix: \n", + " [[754 50]\n", + " [ 54 523]]\n" + ] + } + ], + "source": [ + "y_pred = best_anova_model.predict(best_anova_X_test)\n", + "print(\"Confusion Matrix: \\n\",confusion_matrix(y_test,y_pred))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Insight: Number of selected features still seem to remain same." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "---\n", + "### Task 8.1: Let us apply PCA as our last feature selection method" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "For no of features= 20 , score= 0.9022447501810282\n", + "For no of features= 25 , score= 0.9044170890658942\n", + "For no of features= 30 , score= 0.9058653149891384\n", + "For no of features= 35 , score= 0.9167270094134685\n", + "For no of features= 40 , score= 0.9196234612599565\n", + "For no of features= 50 , score= 0.9167270094134685\n", + "For no of features= 55 , score= 0.9174511223750905\n", + "High Score is: 0.9196234612599565 with features= 40\n" + ] + } + ], + "source": [ + "# Apply PCA and fit the logistic model on train data use df dataset\n", + "nof_list = [20,25,30,35,40,50,55]\n", + "high_score = 0\n", + "nof = 0\n", + "best_pca_lr_model = None\n", + "best_pca_lr_X_train = None\n", + "best_pca_lr_X_test = None\n", + "\n", + "for n in nof_list:\n", + " X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3,random_state = 42)\n", + " pca = PCA(n_components=n)\n", + " pca.fit(X_train)\n", + " X_train = pca.transform(X_train)\n", + " X_test = pca.transform(X_test)\n", + " pca_lr_model = LogisticRegression(random_state=101)\n", + " pca_lr_model.fit(X_train, y_train)\n", + " print(\"For no of features=\",n,\", score=\", pca_lr_model.score(X_test,y_test))\n", + " \n", + " if pca_lr_model.score(X_test,y_test)>high_score:\n", + " high_score = pca_lr_model.score(X_test,y_test)\n", + " nof = n\n", + " best_pca_lr_model = pca_lr_model\n", + " best_pca_lr_X_train = X_train\n", + " best_pca_lr_X_test = X_test\n", + "print(\"High Score is:\",high_score, \"with features=\",nof)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Task 8.2 Print out the Confusion Matrix." + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Confusion Matrix: \n", + " [[759 45]\n", + " [ 66 511]]\n" + ] + } + ], + "source": [ + "y_pred = best_pca_lr_model.predict(best_pca_lr_X_test)\n", + "print(\"Confusion Matrix: \\n\",confusion_matrix(y_test,y_pred))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Insight: There is significant reduction in number of features selected but the score is not the best." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "---\n", + "## Part C: Hyper-parameter optimisation" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "---\n", + "### Task 9: Let us try to optimise the hyper-parameters of high scoring model with featuers selected with PCA" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "GridSearchCV(cv=5, estimator=LogisticRegression(random_state=101),\n", + " param_grid={'penalty': ['l2', 'l1', 'elasticnet'],\n", + " 'solver': ['newton-cg', 'lbfgs', 'liblinear', 'sag',\n", + " 'saga']})" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from sklearn.model_selection import GridSearchCV\n", + "param_grid = {\n", + " 'penalty': ['l2', 'l1', 'elasticnet'],\n", + " 'solver': ['newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga']}\n", + "search = GridSearchCV(best_pca_lr_model, param_grid, cv=5)\n", + "search.fit(best_pca_lr_X_train, y_train)" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'penalty': 'l1', 'solver': 'liblinear'}" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "search.best_params_" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.9170807453416149" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "search.best_score_" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Insight: The score did not improve much." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "---\n", + "### Task 10: Does the order matter? \n", + "**Method A:** \n", + "Try different feature selection techniques, choose the model with best score and finally optimise its hyper-parameters \n", + "\n", + "**Method B:** \n", + "Perform feature selection and hyper-parameters tuning for each model, then select the best model\n", + "\n", + "So far, we have been trying Method A, let us try Method B, first with Chi-square and Anova, then with all Chi-square, Anova and PCA put together\n" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "({'model__penalty': 'l1',\n", + " 'model__solver': 'liblinear',\n", + " 'select__k': 55,\n", + " 'select__score_func': },\n", + " Pipeline(steps=[('select', SelectKBest(k=55)),\n", + " ('model',\n", + " LogisticRegression(penalty='l1', solver='liblinear'))]),\n", + " 0.9134954916678467)" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Using Scikit-learn Pipeline method for Chi2 and Anova\n", + "nof_list = [20,25,30,35,40,50,55]\n", + "scoring_func_list = [f_classif, chi2]\n", + "penalty_list = ['l2', 'l1', 'elasticnet']\n", + "solver_list = ['newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga']\n", + "\n", + "pipe = Pipeline([\n", + " ('select', SelectKBest()),\n", + " ('model', LogisticRegression())\n", + "])\n", + "\n", + "param_grid = {\n", + " 'select__k': nof_list,\n", + " 'select__score_func': scoring_func_list,\n", + " 'model__penalty': penalty_list,\n", + " 'model__solver': solver_list}\n", + "\n", + "search = GridSearchCV(pipe, param_grid, cv=5)\n", + "best_model = search.fit(X, y)\n", + "\n", + "best_model.best_params_,best_model.best_estimator_,best_model.best_score_" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "({'model__penalty': 'l1',\n", + " 'model__solver': 'liblinear',\n", + " 'select': SelectKBest(k=55, score_func=),\n", + " 'select__k': 55,\n", + " 'select__score_func': },\n", + " Pipeline(steps=[('select',\n", + " SelectKBest(k=55,\n", + " score_func=)),\n", + " ('model',\n", + " LogisticRegression(penalty='l1', solver='liblinear'))]),\n", + " 0.9139300382382098)" + ] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Using Scikit-learn Pipeline method for Chi2, Anova and PCA\n", + "nof_list = [20,25,30,35,40,50,55]\n", + "scoring_func_list = [f_classif, chi2]\n", + "penalty_list = ['l2', 'l1', 'elasticnet']\n", + "solver_list = ['newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga']\n", + "\n", + "pipe = Pipeline([\n", + " # select stage is populated by the param_grid\n", + " ('select', 'passthrough'),\n", + " ('model', LogisticRegression())\n", + "])\n", + "\n", + "param_grid = [\n", + " {\n", + " 'select':[SelectKBest()],\n", + " 'select__k': nof_list,\n", + " 'select__score_func': scoring_func_list,\n", + " 'model__penalty': penalty_list,\n", + " 'model__solver': solver_list\n", + " },\n", + " {\n", + " 'select':[PCA()],\n", + " 'select__n_components': nof_list,\n", + " 'model__penalty': penalty_list,\n", + " 'model__solver': solver_list\n", + " }\n", + "]\n", + "search = GridSearchCV(pipe, param_grid, cv=5)\n", + "best_model = search.fit(X, y)\n", + "\n", + "best_model.best_params_,best_model.best_estimator_,best_model.best_score_" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.0" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/Getting_started_with_text_preprocessing/notebook/Getting_started_with_text_preprocessing_MK.ipynb b/Getting_started_with_text_preprocessing/notebook/Getting_started_with_text_preprocessing_MK.ipynb new file mode 100644 index 0000000..ed8b7c2 --- /dev/null +++ b/Getting_started_with_text_preprocessing/notebook/Getting_started_with_text_preprocessing_MK.ipynb @@ -0,0 +1,2372 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Text Data Preprocessing\n", + "\n", + "In any machine learning task, cleaning or preprocessing the data is as important as model building if not more. And when it comes to unstructured data like text, this process is even more important.\n", + "\n", + "Objective of this notebook is to understand the various text preprocessing steps with code examples.\n", + "\n", + "Some of the common text preprocessing / cleaning steps are:\n", + "\n", + "* Lower casing\n", + "* Removal of Punctuations\n", + "* Removal of Stopwords\n", + "* Removal of Frequent words\n", + "* Removal of Rare words\n", + "* Stemming\n", + "* Lemmatization\n", + "* Removal of emojis\n", + "* Removal of URLs\n", + "\n", + "\n", + "So these are the different types of text preprocessing steps which we can do on text data. But we need not do all of these all the times. We need to carefully choose the preprocessing steps based on our use case since that also play an important role.\n", + "\n", + "For example, in sentiment analysis use case, we need not remove the emojis as it will convey some important information about the sentiment. Similarly we need to decide based on our use cases." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Import libraries" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import pandas as pd\n", + "import re\n", + "import nltk\n", + "import spacy\n", + "import string\n", + "#pd.options.mode.chained_assignment = None" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Read the data" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "pd.set_option('max_colwidth', 100)" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'1.0.5'" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pd.__version__" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(5000, 1)" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df = pd.read_csv('../data/text.csv', lineterminator='\\n')\n", + "df.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
text
0@161252 What's that egg website people talk about
1Why!🤷🏻‍♀️ #iOS11 @AppleSupport https://t.co/BXrVfeIXxq
2@693975 We can assist you. We recommend updating to iOS 11.1.1 if you haven't had the chance to ...
3@331912 @115955 Thats better than having an unstable connection that drops every 5-20 mins
4@VirginAmerica is probably one of the best airlines I've ever experienced.
\n", + "
" + ], + "text/plain": [ + " text\n", + "0 @161252 What's that egg website people talk about\n", + "1 Why!🤷🏻‍♀️ #iOS11 @AppleSupport https://t.co/BXrVfeIXxq\n", + "2 @693975 We can assist you. We recommend updating to iOS 11.1.1 if you haven't had the chance to ...\n", + "3 @331912 @115955 Thats better than having an unstable connection that drops every 5-20 mins\n", + "4 @VirginAmerica is probably one of the best airlines I've ever experienced." + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
text
85@235730 and click on \" How to disable Automatic Restart\".\\nShould you need any assistance, DM th...
86@AmericanAir So disappointed in your service. How can you not keep customers and staff updated.
87@811677 Hey, we're unable to tell when an item will be back in stock when it is listed as \"Tempo...
88Woke up....hyped for a Sunday stream......................................................\\n\\nCo...
89@542004 We can take a look for you, Jessi. Please DM your booking reference, full name, home and...
\n", + "
" + ], + "text/plain": [ + " text\n", + "85 @235730 and click on \" How to disable Automatic Restart\".\\nShould you need any assistance, DM th...\n", + "86 @AmericanAir So disappointed in your service. How can you not keep customers and staff updated.\n", + "87 @811677 Hey, we're unable to tell when an item will be back in stock when it is listed as \"Tempo...\n", + "88 Woke up....hyped for a Sunday stream......................................................\\n\\nCo...\n", + "89 @542004 We can take a look for you, Jessi. Please DM your booking reference, full name, home and..." + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.iloc[85:90]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Lower Casing\n", + "Lower casing is a common text preprocessing technique. The idea is to convert the input text into same casing format so that 'text', 'Text' and 'TEXT' are treated the same way.\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
texttext_lower
0@161252 What's that egg website people talk about@161252 what's that egg website people talk about
1Why!🤷🏻‍♀️ #iOS11 @AppleSupport https://t.co/BXrVfeIXxqwhy!🤷🏻‍♀️ #ios11 @applesupport https://t.co/bxrvfeixxq
2@693975 We can assist you. We recommend updating to iOS 11.1.1 if you haven't had the chance to ...@693975 we can assist you. we recommend updating to ios 11.1.1 if you haven't had the chance to ...
3@331912 @115955 Thats better than having an unstable connection that drops every 5-20 mins@331912 @115955 thats better than having an unstable connection that drops every 5-20 mins
4@VirginAmerica is probably one of the best airlines I've ever experienced.@virginamerica is probably one of the best airlines i've ever experienced.
\n", + "
" + ], + "text/plain": [ + " text \\\n", + "0 @161252 What's that egg website people talk about \n", + "1 Why!🤷🏻‍♀️ #iOS11 @AppleSupport https://t.co/BXrVfeIXxq \n", + "2 @693975 We can assist you. We recommend updating to iOS 11.1.1 if you haven't had the chance to ... \n", + "3 @331912 @115955 Thats better than having an unstable connection that drops every 5-20 mins \n", + "4 @VirginAmerica is probably one of the best airlines I've ever experienced. \n", + "\n", + " text_lower \n", + "0 @161252 what's that egg website people talk about \n", + "1 why!🤷🏻‍♀️ #ios11 @applesupport https://t.co/bxrvfeixxq \n", + "2 @693975 we can assist you. we recommend updating to ios 11.1.1 if you haven't had the chance to ... \n", + "3 @331912 @115955 thats better than having an unstable connection that drops every 5-20 mins \n", + "4 @virginamerica is probably one of the best airlines i've ever experienced. " + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df['text_lower'] = df.text.str.lower()\n", + "df.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Removal of Punctuations\n", + "\n", + "One another common text preprocessing technique is to remove the punctuations from the text data. This is again a text standardization process that will help to treat 'hurray' and 'hurray!' in the same way.\n", + "\n", + "We also need to carefully choose the list of punctuations to exclude depending on the use case. For example, the `string.punctuation` in python contains the following punctuation symbols \n", + "```\n", + "!\"#$%&\\'()*+,-./:;<=>?@[\\\\]^_{|}~`\n", + "```\n", + "\n", + "We can add or remove more punctuations as per our need." + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'\\\\!\\\\\"\\\\#\\\\$\\\\%\\\\&\\\\\\'\\\\(\\\\)\\\\*\\\\+\\\\,\\\\-\\\\.\\\\/\\\\:\\\\;\\\\<\\\\=\\\\>\\\\?\\\\@\\\\[\\\\\\\\\\\\]\\\\^\\\\_\\\\`\\\\{\\\\|\\\\}\\\\~'" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "\"\".join([\"\\\\\"+c for c in string.punctuation])" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[\\!\\\"\\#\\$\\%\\&\\'\\(\\)\\*\\+\\,\\-\\.\\/\\:\\;\\<\\=\\>\\?\\@\\[\\\\\\]\\^\\_\\`\\{\\|\\}\\~]\n" + ] + } + ], + "source": [ + "print(\"[\" + \"\".join([\"\\\\\"+c for c in string.punctuation]) + \"]\")" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "df[\"text_wo_punct\"] = df.text_lower.str.replace(\"-\",\" \")" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
texttext_lowertext_wo_punct
0@161252 What's that egg website people talk about@161252 what's that egg website people talk about@161252 what's that egg website people talk about
1Why!🤷🏻‍♀️ #iOS11 @AppleSupport https://t.co/BXrVfeIXxqwhy!🤷🏻‍♀️ #ios11 @applesupport https://t.co/bxrvfeixxqwhy!🤷🏻‍♀️ #ios11 @applesupport https://t.co/bxrvfeixxq
2@693975 We can assist you. We recommend updating to iOS 11.1.1 if you haven't had the chance to ...@693975 we can assist you. we recommend updating to ios 11.1.1 if you haven't had the chance to ...@693975 we can assist you. we recommend updating to ios 11.1.1 if you haven't had the chance to ...
3@331912 @115955 Thats better than having an unstable connection that drops every 5-20 mins@331912 @115955 thats better than having an unstable connection that drops every 5-20 mins@331912 @115955 thats better than having an unstable connection that drops every 5 20 mins
4@VirginAmerica is probably one of the best airlines I've ever experienced.@virginamerica is probably one of the best airlines i've ever experienced.@virginamerica is probably one of the best airlines i've ever experienced.
\n", + "
" + ], + "text/plain": [ + " text \\\n", + "0 @161252 What's that egg website people talk about \n", + "1 Why!🤷🏻‍♀️ #iOS11 @AppleSupport https://t.co/BXrVfeIXxq \n", + "2 @693975 We can assist you. We recommend updating to iOS 11.1.1 if you haven't had the chance to ... \n", + "3 @331912 @115955 Thats better than having an unstable connection that drops every 5-20 mins \n", + "4 @VirginAmerica is probably one of the best airlines I've ever experienced. \n", + "\n", + " text_lower \\\n", + "0 @161252 what's that egg website people talk about \n", + "1 why!🤷🏻‍♀️ #ios11 @applesupport https://t.co/bxrvfeixxq \n", + "2 @693975 we can assist you. we recommend updating to ios 11.1.1 if you haven't had the chance to ... \n", + "3 @331912 @115955 thats better than having an unstable connection that drops every 5-20 mins \n", + "4 @virginamerica is probably one of the best airlines i've ever experienced. \n", + "\n", + " text_wo_punct \n", + "0 @161252 what's that egg website people talk about \n", + "1 why!🤷🏻‍♀️ #ios11 @applesupport https://t.co/bxrvfeixxq \n", + "2 @693975 we can assist you. we recommend updating to ios 11.1.1 if you haven't had the chance to ... \n", + "3 @331912 @115955 thats better than having an unstable connection that drops every 5 20 mins \n", + "4 @virginamerica is probably one of the best airlines i've ever experienced. " + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
texttext_lowertext_wo_punct
0@161252 What's that egg website people talk about@161252 what's that egg website people talk about161252 whats that egg website people talk about
1Why!🤷🏻‍♀️ #iOS11 @AppleSupport https://t.co/BXrVfeIXxqwhy!🤷🏻‍♀️ #ios11 @applesupport https://t.co/bxrvfeixxqwhy🤷🏻‍♀️ ios11 applesupport httpstcobxrvfeixxq
2@693975 We can assist you. We recommend updating to iOS 11.1.1 if you haven't had the chance to ...@693975 we can assist you. we recommend updating to ios 11.1.1 if you haven't had the chance to ...693975 we can assist you we recommend updating to ios 1111 if you havent had the chance to do so...
3@331912 @115955 Thats better than having an unstable connection that drops every 5-20 mins@331912 @115955 thats better than having an unstable connection that drops every 5-20 mins331912 115955 thats better than having an unstable connection that drops every 5 20 mins
4@VirginAmerica is probably one of the best airlines I've ever experienced.@virginamerica is probably one of the best airlines i've ever experienced.virginamerica is probably one of the best airlines ive ever experienced
\n", + "
" + ], + "text/plain": [ + " text \\\n", + "0 @161252 What's that egg website people talk about \n", + "1 Why!🤷🏻‍♀️ #iOS11 @AppleSupport https://t.co/BXrVfeIXxq \n", + "2 @693975 We can assist you. We recommend updating to iOS 11.1.1 if you haven't had the chance to ... \n", + "3 @331912 @115955 Thats better than having an unstable connection that drops every 5-20 mins \n", + "4 @VirginAmerica is probably one of the best airlines I've ever experienced. \n", + "\n", + " text_lower \\\n", + "0 @161252 what's that egg website people talk about \n", + "1 why!🤷🏻‍♀️ #ios11 @applesupport https://t.co/bxrvfeixxq \n", + "2 @693975 we can assist you. we recommend updating to ios 11.1.1 if you haven't had the chance to ... \n", + "3 @331912 @115955 thats better than having an unstable connection that drops every 5-20 mins \n", + "4 @virginamerica is probably one of the best airlines i've ever experienced. \n", + "\n", + " text_wo_punct \n", + "0 161252 whats that egg website people talk about \n", + "1 why🤷🏻‍♀️ ios11 applesupport httpstcobxrvfeixxq \n", + "2 693975 we can assist you we recommend updating to ios 1111 if you havent had the chance to do so... \n", + "3 331912 115955 thats better than having an unstable connection that drops every 5 20 mins \n", + "4 virginamerica is probably one of the best airlines ive ever experienced " + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df[\"text_wo_punct\"] = df.text_wo_punct.str.replace( \"[\" + \"\".join([\"\\\\\"+c for c in string.punctuation]) + \"]\" , \"\")\n", + "df.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Removal of stopwords\n", + "Stopwords are commonly occuring words in a language like 'the', 'a' and so on. They can be removed from the text most of the times, as they don't provide valuable information for downstream analysis. In cases like Part of Speech tagging, we should not remove them as provide very valuable information about the POS.\n", + "\n", + "These stopword lists are already compiled for different languages and we can safely use them. For example, the stopword list for english language from the nltk package can be seen below." + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[nltk_data] Downloading package stopwords to\n", + "[nltk_data] C:\\Users\\kukre\\AppData\\Roaming\\nltk_data...\n", + "[nltk_data] Package stopwords is already up-to-date!\n" + ] + }, + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import nltk\n", + "nltk.download('stopwords')" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "\"i, me, my, myself, we, our, ours, ourselves, you, you're, you've, you'll, you'd, your, yours, yourself, yourselves, he, him, his, himself, she, she's, her, hers, herself, it, it's, its, itself, they, them, their, theirs, themselves, what, which, who, whom, this, that, that'll, these, those, am, is, are, was, were, be, been, being, have, has, had, having, do, does, did, doing, a, an, the, and, but, if, or, because, as, until, while, of, at, by, for, with, about, against, between, into, through, during, before, after, above, below, to, from, up, down, in, out, on, off, over, under, again, further, then, once, here, there, when, where, why, how, all, any, both, each, few, more, most, other, some, such, no, nor, not, only, own, same, so, than, too, very, s, t, can, will, just, don, don't, should, should've, now, d, ll, m, o, re, ve, y, ain, aren, aren't, couldn, couldn't, didn, didn't, doesn, doesn't, hadn, hadn't, hasn, hasn't, haven, haven't, isn, isn't, ma, mightn, mightn't, mustn, mustn't, needn, needn't, shan, shan't, shouldn, shouldn't, wasn, wasn't, weren, weren't, won, won't, wouldn, wouldn't\"" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from nltk.corpus import stopwords\n", + "\", \".join(stopwords.words('english'))" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'161252 whats that egg website people talk about'" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.text_wo_punct.iloc[0]" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'161252 whats egg website people talk'" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "text = df.text_wo_punct.iloc[0]\n", + "\n", + "\" \".join([word for word in text.split() if word not in stopwords.words('english')])" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [], + "source": [ + "def remove_words(text, wordlist):\n", + " \"\"\" custome function to remove words from text present in the wordlist\"\"\"\n", + " return \" \".join([word for word in text.split() if word not in wordlist])" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
texttext_lowertext_wo_puncttext_wo_stop
0@161252 What's that egg website people talk about@161252 what's that egg website people talk about161252 whats that egg website people talk about161252 whats egg website people talk
1Why!🤷🏻‍♀️ #iOS11 @AppleSupport https://t.co/BXrVfeIXxqwhy!🤷🏻‍♀️ #ios11 @applesupport https://t.co/bxrvfeixxqwhy🤷🏻‍♀️ ios11 applesupport httpstcobxrvfeixxqwhy🤷🏻‍♀️ ios11 applesupport httpstcobxrvfeixxq
2@693975 We can assist you. We recommend updating to iOS 11.1.1 if you haven't had the chance to ...@693975 we can assist you. we recommend updating to ios 11.1.1 if you haven't had the chance to ...693975 we can assist you we recommend updating to ios 1111 if you havent had the chance to do so...693975 assist recommend updating ios 1111 havent chance also dm us following link futher support...
3@331912 @115955 Thats better than having an unstable connection that drops every 5-20 mins@331912 @115955 thats better than having an unstable connection that drops every 5-20 mins331912 115955 thats better than having an unstable connection that drops every 5 20 mins331912 115955 thats better unstable connection drops every 5 20 mins
4@VirginAmerica is probably one of the best airlines I've ever experienced.@virginamerica is probably one of the best airlines i've ever experienced.virginamerica is probably one of the best airlines ive ever experiencedvirginamerica probably one best airlines ive ever experienced
\n", + "
" + ], + "text/plain": [ + " text \\\n", + "0 @161252 What's that egg website people talk about \n", + "1 Why!🤷🏻‍♀️ #iOS11 @AppleSupport https://t.co/BXrVfeIXxq \n", + "2 @693975 We can assist you. We recommend updating to iOS 11.1.1 if you haven't had the chance to ... \n", + "3 @331912 @115955 Thats better than having an unstable connection that drops every 5-20 mins \n", + "4 @VirginAmerica is probably one of the best airlines I've ever experienced. \n", + "\n", + " text_lower \\\n", + "0 @161252 what's that egg website people talk about \n", + "1 why!🤷🏻‍♀️ #ios11 @applesupport https://t.co/bxrvfeixxq \n", + "2 @693975 we can assist you. we recommend updating to ios 11.1.1 if you haven't had the chance to ... \n", + "3 @331912 @115955 thats better than having an unstable connection that drops every 5-20 mins \n", + "4 @virginamerica is probably one of the best airlines i've ever experienced. \n", + "\n", + " text_wo_punct \\\n", + "0 161252 whats that egg website people talk about \n", + "1 why🤷🏻‍♀️ ios11 applesupport httpstcobxrvfeixxq \n", + "2 693975 we can assist you we recommend updating to ios 1111 if you havent had the chance to do so... \n", + "3 331912 115955 thats better than having an unstable connection that drops every 5 20 mins \n", + "4 virginamerica is probably one of the best airlines ive ever experienced \n", + "\n", + " text_wo_stop \n", + "0 161252 whats egg website people talk \n", + "1 why🤷🏻‍♀️ ios11 applesupport httpstcobxrvfeixxq \n", + "2 693975 assist recommend updating ios 1111 havent chance also dm us following link futher support... \n", + "3 331912 115955 thats better unstable connection drops every 5 20 mins \n", + "4 virginamerica probably one best airlines ive ever experienced " + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df['text_wo_stop'] = df.text_wo_punct.apply(lambda text: remove_words(text, stopwords.words('english')))\n", + "df.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Removal of Frequent words\n", + "In the previos preprocessing step, we removed the stopwords based on language information. But say, if we have a domain specific corpus, we might also have some frequent words which are of not so much importance to us.\n", + "\n", + "So this step is to remove the frequent words in the given corpus. If we use something like tfidf, this is automatically taken care of.\n", + "\n", + "Let us get the most common words adn then remove them in the next step" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "\"i, me, my, myself, we, our, ours, ourselves, you, you're, you've, you'll, you'd, your, yours, yourself, yourselves, he, him, his, himself, she, she's, her, hers, herself, it, it's, its, itself, they, them, their, theirs, themselves, which, who, whom, this, that, that'll, these, those, am, is, are, was, were, be, been, being, have, has, had, having, do, does, did, doing, a, an, the, and, but, if, or, because, as, until, while, of, at, by, for, with, about, against, between, into, through, during, before, after, above, below, to, from, up, down, in, out, on, off, over, under, again, further, then, once, here, there, where, why, how, all, any, both, each, few, more, most, other, some, such, no, nor, not, only, own, same, so, than, too, very, s, t, can, will, just, don, don't, should, should've, now, d, ll, m, o, re, ve, y, ain, aren, aren't, couldn, couldn't, didn, didn't, doesn, doesn't, hadn, hadn't, hasn, hasn't, haven, haven't, isn, isn't, ma, mightn, mightn't, mustn, mustn't, needn, needn't, shan, shan't, shouldn, shouldn't, wasn, wasn't, weren, weren't, won, won't, wouldn, wouldn't\"" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "stop_word_list = stopwords.words('english')\n", + "stop_word_list.remove('when')\n", + "stop_word_list.remove('what')\n", + "\n", + "\", \".join(stop_word_list)" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[('us', 836),\n", + " ('please', 747),\n", + " ('dm', 633),\n", + " ('help', 460),\n", + " ('thanks', 405),\n", + " ('hi', 404),\n", + " ('get', 352),\n", + " ('sorry', 314),\n", + " ('like', 281),\n", + " ('send', 276)]" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from collections import Counter\n", + "cnt = Counter()\n", + "\n", + "for text in df.text_wo_stop.values:\n", + " for word in text.split():\n", + " cnt[word] += 1\n", + " \n", + "cnt.most_common(10)" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['us', 'please', 'dm', 'help', 'thanks', 'hi', 'get', 'sorry', 'like', 'send']" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "[w for w,_ in cnt.most_common(10)]" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
texttext_lowertext_wo_puncttext_wo_stoptext_wo_stopfreq
0@161252 What's that egg website people talk about@161252 what's that egg website people talk about161252 whats that egg website people talk about161252 whats egg website people talk161252 whats egg website people talk
1Why!🤷🏻‍♀️ #iOS11 @AppleSupport https://t.co/BXrVfeIXxqwhy!🤷🏻‍♀️ #ios11 @applesupport https://t.co/bxrvfeixxqwhy🤷🏻‍♀️ ios11 applesupport httpstcobxrvfeixxqwhy🤷🏻‍♀️ ios11 applesupport httpstcobxrvfeixxqwhy🤷🏻‍♀️ ios11 applesupport httpstcobxrvfeixxq
2@693975 We can assist you. We recommend updating to iOS 11.1.1 if you haven't had the chance to ...@693975 we can assist you. we recommend updating to ios 11.1.1 if you haven't had the chance to ...693975 we can assist you we recommend updating to ios 1111 if you havent had the chance to do so...693975 assist recommend updating ios 1111 havent chance also dm us following link futher support...693975 assist recommend updating ios 1111 havent chance also following link futher support https...
3@331912 @115955 Thats better than having an unstable connection that drops every 5-20 mins@331912 @115955 thats better than having an unstable connection that drops every 5-20 mins331912 115955 thats better than having an unstable connection that drops every 5 20 mins331912 115955 thats better unstable connection drops every 5 20 mins331912 115955 thats better unstable connection drops every 5 20 mins
4@VirginAmerica is probably one of the best airlines I've ever experienced.@virginamerica is probably one of the best airlines i've ever experienced.virginamerica is probably one of the best airlines ive ever experiencedvirginamerica probably one best airlines ive ever experiencedvirginamerica probably one best airlines ive ever experienced
\n", + "
" + ], + "text/plain": [ + " text \\\n", + "0 @161252 What's that egg website people talk about \n", + "1 Why!🤷🏻‍♀️ #iOS11 @AppleSupport https://t.co/BXrVfeIXxq \n", + "2 @693975 We can assist you. We recommend updating to iOS 11.1.1 if you haven't had the chance to ... \n", + "3 @331912 @115955 Thats better than having an unstable connection that drops every 5-20 mins \n", + "4 @VirginAmerica is probably one of the best airlines I've ever experienced. \n", + "\n", + " text_lower \\\n", + "0 @161252 what's that egg website people talk about \n", + "1 why!🤷🏻‍♀️ #ios11 @applesupport https://t.co/bxrvfeixxq \n", + "2 @693975 we can assist you. we recommend updating to ios 11.1.1 if you haven't had the chance to ... \n", + "3 @331912 @115955 thats better than having an unstable connection that drops every 5-20 mins \n", + "4 @virginamerica is probably one of the best airlines i've ever experienced. \n", + "\n", + " text_wo_punct \\\n", + "0 161252 whats that egg website people talk about \n", + "1 why🤷🏻‍♀️ ios11 applesupport httpstcobxrvfeixxq \n", + "2 693975 we can assist you we recommend updating to ios 1111 if you havent had the chance to do so... \n", + "3 331912 115955 thats better than having an unstable connection that drops every 5 20 mins \n", + "4 virginamerica is probably one of the best airlines ive ever experienced \n", + "\n", + " text_wo_stop \\\n", + "0 161252 whats egg website people talk \n", + "1 why🤷🏻‍♀️ ios11 applesupport httpstcobxrvfeixxq \n", + "2 693975 assist recommend updating ios 1111 havent chance also dm us following link futher support... \n", + "3 331912 115955 thats better unstable connection drops every 5 20 mins \n", + "4 virginamerica probably one best airlines ive ever experienced \n", + "\n", + " text_wo_stopfreq \n", + "0 161252 whats egg website people talk \n", + "1 why🤷🏻‍♀️ ios11 applesupport httpstcobxrvfeixxq \n", + "2 693975 assist recommend updating ios 1111 havent chance also following link futher support https... \n", + "3 331912 115955 thats better unstable connection drops every 5 20 mins \n", + "4 virginamerica probably one best airlines ive ever experienced " + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df['text_wo_stopfreq'] = df.text_wo_stop.apply(lambda text: remove_words(text, [w for w,_ in cnt.most_common(10)] ))\n", + "df.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Removal of Rare words\n", + "This is very similar to previous preprocessing step but we will remove the rare words from the corpus." + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[('custom', 1),\n", + " ('puma', 1),\n", + " ('inr1400', 1),\n", + " ('170844', 1),\n", + " ('bricked', 1),\n", + " ('implementing', 1),\n", + " ('407091', 1),\n", + " ('reunion', 1),\n", + " ('gravity', 1),\n", + " ('319396', 1),\n", + " ('684726', 1),\n", + " ('hotmail', 1),\n", + " ('sean', 1),\n", + " ('457844', 1),\n", + " ('703576', 1),\n", + " ('598743', 1),\n", + " ('hk', 1),\n", + " ('313942', 1),\n", + " ('httpstcobqcl3gv57t', 1)]" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "cnt.most_common()[:-20:-1]" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
texttext_lowertext_wo_puncttext_wo_stoptext_wo_stopfreqtext_wo_stopfreqrare
0@161252 What's that egg website people talk about@161252 what's that egg website people talk about161252 whats that egg website people talk about161252 whats egg website people talk161252 whats egg website people talk161252 whats egg website people talk
1Why!🤷🏻‍♀️ #iOS11 @AppleSupport https://t.co/BXrVfeIXxqwhy!🤷🏻‍♀️ #ios11 @applesupport https://t.co/bxrvfeixxqwhy🤷🏻‍♀️ ios11 applesupport httpstcobxrvfeixxqwhy🤷🏻‍♀️ ios11 applesupport httpstcobxrvfeixxqwhy🤷🏻‍♀️ ios11 applesupport httpstcobxrvfeixxqwhy🤷🏻‍♀️ ios11 applesupport httpstcobxrvfeixxq
2@693975 We can assist you. We recommend updating to iOS 11.1.1 if you haven't had the chance to ...@693975 we can assist you. we recommend updating to ios 11.1.1 if you haven't had the chance to ...693975 we can assist you we recommend updating to ios 1111 if you havent had the chance to do so...693975 assist recommend updating ios 1111 havent chance also dm us following link futher support...693975 assist recommend updating ios 1111 havent chance also following link futher support https...693975 assist recommend updating ios 1111 havent chance also following link futher support https...
3@331912 @115955 Thats better than having an unstable connection that drops every 5-20 mins@331912 @115955 thats better than having an unstable connection that drops every 5-20 mins331912 115955 thats better than having an unstable connection that drops every 5 20 mins331912 115955 thats better unstable connection drops every 5 20 mins331912 115955 thats better unstable connection drops every 5 20 mins331912 115955 thats better unstable connection drops every 5 20 mins
4@VirginAmerica is probably one of the best airlines I've ever experienced.@virginamerica is probably one of the best airlines i've ever experienced.virginamerica is probably one of the best airlines ive ever experiencedvirginamerica probably one best airlines ive ever experiencedvirginamerica probably one best airlines ive ever experiencedvirginamerica probably one best airlines ive ever experienced
\n", + "
" + ], + "text/plain": [ + " text \\\n", + "0 @161252 What's that egg website people talk about \n", + "1 Why!🤷🏻‍♀️ #iOS11 @AppleSupport https://t.co/BXrVfeIXxq \n", + "2 @693975 We can assist you. We recommend updating to iOS 11.1.1 if you haven't had the chance to ... \n", + "3 @331912 @115955 Thats better than having an unstable connection that drops every 5-20 mins \n", + "4 @VirginAmerica is probably one of the best airlines I've ever experienced. \n", + "\n", + " text_lower \\\n", + "0 @161252 what's that egg website people talk about \n", + "1 why!🤷🏻‍♀️ #ios11 @applesupport https://t.co/bxrvfeixxq \n", + "2 @693975 we can assist you. we recommend updating to ios 11.1.1 if you haven't had the chance to ... \n", + "3 @331912 @115955 thats better than having an unstable connection that drops every 5-20 mins \n", + "4 @virginamerica is probably one of the best airlines i've ever experienced. \n", + "\n", + " text_wo_punct \\\n", + "0 161252 whats that egg website people talk about \n", + "1 why🤷🏻‍♀️ ios11 applesupport httpstcobxrvfeixxq \n", + "2 693975 we can assist you we recommend updating to ios 1111 if you havent had the chance to do so... \n", + "3 331912 115955 thats better than having an unstable connection that drops every 5 20 mins \n", + "4 virginamerica is probably one of the best airlines ive ever experienced \n", + "\n", + " text_wo_stop \\\n", + "0 161252 whats egg website people talk \n", + "1 why🤷🏻‍♀️ ios11 applesupport httpstcobxrvfeixxq \n", + "2 693975 assist recommend updating ios 1111 havent chance also dm us following link futher support... \n", + "3 331912 115955 thats better unstable connection drops every 5 20 mins \n", + "4 virginamerica probably one best airlines ive ever experienced \n", + "\n", + " text_wo_stopfreq \\\n", + "0 161252 whats egg website people talk \n", + "1 why🤷🏻‍♀️ ios11 applesupport httpstcobxrvfeixxq \n", + "2 693975 assist recommend updating ios 1111 havent chance also following link futher support https... \n", + "3 331912 115955 thats better unstable connection drops every 5 20 mins \n", + "4 virginamerica probably one best airlines ive ever experienced \n", + "\n", + " text_wo_stopfreqrare \n", + "0 161252 whats egg website people talk \n", + "1 why🤷🏻‍♀️ ios11 applesupport httpstcobxrvfeixxq \n", + "2 693975 assist recommend updating ios 1111 havent chance also following link futher support https... \n", + "3 331912 115955 thats better unstable connection drops every 5 20 mins \n", + "4 virginamerica probably one best airlines ive ever experienced " + ] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df['text_wo_stopfreqrare'] = df.text_wo_stopfreq.apply(lambda text: remove_words(text, [w for w,_ in cnt.most_common()[:-10:-1]] ))\n", + "df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
wordcount
42301612521
4231why🤷🏻‍♀️1
4232httpstcobxrvfeixxq1
42336939751
4234futher1
.........
13472bricked1
134731708441
13474inr14001
13475puma1
13476custom1
\n", + "

9247 rows × 2 columns

\n", + "
" + ], + "text/plain": [ + " word count\n", + "4230 161252 1\n", + "4231 why🤷🏻‍♀️ 1\n", + "4232 httpstcobxrvfeixxq 1\n", + "4233 693975 1\n", + "4234 futher 1\n", + "... ... ...\n", + "13472 bricked 1\n", + "13473 170844 1\n", + "13474 inr1400 1\n", + "13475 puma 1\n", + "13476 custom 1\n", + "\n", + "[9247 rows x 2 columns]" + ] + }, + "execution_count": 25, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pd.DataFrame(cnt.most_common(), columns=['word','count']).query('count == 1')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Stemming\n", + "Stemming is the process of reducing inflected (or sometimes derived) words to their word stem, base or root form (From Wikipedia)\n", + "\n", + "For example, if there are two words in the corpus walks and walking, then stemming will stem the suffix to make them walk. But say in another example, we have two words console and consoling, the stemmer will remove the suffix and make them consol which is not a proper english word.\n", + "\n", + "There are several type of stemming algorithms available and one of the famous one is porter stemmer which is widely used. We can use nltk package for the same." + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
texttext_lowertext_stemmed
0@161252 What's that egg website people talk about@161252 what's that egg website people talk about@161252 what' that egg websit peopl talk about
1Why!🤷🏻‍♀️ #iOS11 @AppleSupport https://t.co/BXrVfeIXxqwhy!🤷🏻‍♀️ #ios11 @applesupport https://t.co/bxrvfeixxqwhy!🤷🏻‍♀️ #ios11 @applesupport https://t.co/bxrvfeixxq
2@693975 We can assist you. We recommend updating to iOS 11.1.1 if you haven't had the chance to ...@693975 we can assist you. we recommend updating to ios 11.1.1 if you haven't had the chance to ...@693975 we can assist you. we recommend updat to io 11.1.1 if you haven't had the chanc to do so...
3@331912 @115955 Thats better than having an unstable connection that drops every 5-20 mins@331912 @115955 thats better than having an unstable connection that drops every 5-20 mins@331912 @115955 that better than have an unstabl connect that drop everi 5-20 min
4@VirginAmerica is probably one of the best airlines I've ever experienced.@virginamerica is probably one of the best airlines i've ever experienced.@virginamerica is probabl one of the best airlin i'v ever experienced.
\n", + "
" + ], + "text/plain": [ + " text \\\n", + "0 @161252 What's that egg website people talk about \n", + "1 Why!🤷🏻‍♀️ #iOS11 @AppleSupport https://t.co/BXrVfeIXxq \n", + "2 @693975 We can assist you. We recommend updating to iOS 11.1.1 if you haven't had the chance to ... \n", + "3 @331912 @115955 Thats better than having an unstable connection that drops every 5-20 mins \n", + "4 @VirginAmerica is probably one of the best airlines I've ever experienced. \n", + "\n", + " text_lower \\\n", + "0 @161252 what's that egg website people talk about \n", + "1 why!🤷🏻‍♀️ #ios11 @applesupport https://t.co/bxrvfeixxq \n", + "2 @693975 we can assist you. we recommend updating to ios 11.1.1 if you haven't had the chance to ... \n", + "3 @331912 @115955 thats better than having an unstable connection that drops every 5-20 mins \n", + "4 @virginamerica is probably one of the best airlines i've ever experienced. \n", + "\n", + " text_stemmed \n", + "0 @161252 what' that egg websit peopl talk about \n", + "1 why!🤷🏻‍♀️ #ios11 @applesupport https://t.co/bxrvfeixxq \n", + "2 @693975 we can assist you. we recommend updat to io 11.1.1 if you haven't had the chanc to do so... \n", + "3 @331912 @115955 that better than have an unstabl connect that drop everi 5-20 min \n", + "4 @virginamerica is probabl one of the best airlin i'v ever experienced. " + ] + }, + "execution_count": 26, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from nltk.stem.porter import PorterStemmer\n", + "\n", + "\n", + "stemmer = PorterStemmer()\n", + "def stem_words(text):\n", + " return \" \".join([stemmer.stem(word) for word in text.split()])\n", + "\n", + "df['text_stemmed'] = df.text_lower.apply(stem_words)\n", + "df.head()[['text','text_lower','text_stemmed']]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "'ppl' -> # normailisation" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Lemmatization\n", + "Lemmatization is similar to stemming in reducing inflected words to their word stem but differs in the way that it makes sure the root word (also called as lemma) belongs to the language.\n", + "\n", + "As a result, this one is generally slower than stemming process. So depending on the speed requirement, we can choose to use either stemming or lemmatization.\n", + "\n", + "Let us use the WordNetLemmatizer in nltk to lemmatize our sentences" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[nltk_data] Downloading package wordnet to\n", + "[nltk_data] C:\\Users\\kukre\\AppData\\Roaming\\nltk_data...\n", + "[nltk_data] Package wordnet is already up-to-date!\n" + ] + }, + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 27, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import nltk\n", + "nltk.download('wordnet')" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
texttext_lowertext_stemmedtext_lemmatized
0@161252 What's that egg website people talk about@161252 what's that egg website people talk about@161252 what' that egg websit peopl talk about@161252 what's that egg website people talk about
1Why!🤷🏻‍♀️ #iOS11 @AppleSupport https://t.co/BXrVfeIXxqwhy!🤷🏻‍♀️ #ios11 @applesupport https://t.co/bxrvfeixxqwhy!🤷🏻‍♀️ #ios11 @applesupport https://t.co/bxrvfeixxqwhy!🤷🏻‍♀️ #ios11 @applesupport https://t.co/bxrvfeixxq
2@693975 We can assist you. We recommend updating to iOS 11.1.1 if you haven't had the chance to ...@693975 we can assist you. we recommend updating to ios 11.1.1 if you haven't had the chance to ...@693975 we can assist you. we recommend updat to io 11.1.1 if you haven't had the chanc to do so...@693975 we can assist you. we recommend updating to io 11.1.1 if you haven't had the chance to d...
3@331912 @115955 Thats better than having an unstable connection that drops every 5-20 mins@331912 @115955 thats better than having an unstable connection that drops every 5-20 mins@331912 @115955 that better than have an unstabl connect that drop everi 5-20 min@331912 @115955 thats better than having an unstable connection that drop every 5-20 min
4@VirginAmerica is probably one of the best airlines I've ever experienced.@virginamerica is probably one of the best airlines i've ever experienced.@virginamerica is probabl one of the best airlin i'v ever experienced.@virginamerica is probably one of the best airline i've ever experienced.
\n", + "
" + ], + "text/plain": [ + " text \\\n", + "0 @161252 What's that egg website people talk about \n", + "1 Why!🤷🏻‍♀️ #iOS11 @AppleSupport https://t.co/BXrVfeIXxq \n", + "2 @693975 We can assist you. We recommend updating to iOS 11.1.1 if you haven't had the chance to ... \n", + "3 @331912 @115955 Thats better than having an unstable connection that drops every 5-20 mins \n", + "4 @VirginAmerica is probably one of the best airlines I've ever experienced. \n", + "\n", + " text_lower \\\n", + "0 @161252 what's that egg website people talk about \n", + "1 why!🤷🏻‍♀️ #ios11 @applesupport https://t.co/bxrvfeixxq \n", + "2 @693975 we can assist you. we recommend updating to ios 11.1.1 if you haven't had the chance to ... \n", + "3 @331912 @115955 thats better than having an unstable connection that drops every 5-20 mins \n", + "4 @virginamerica is probably one of the best airlines i've ever experienced. \n", + "\n", + " text_stemmed \\\n", + "0 @161252 what' that egg websit peopl talk about \n", + "1 why!🤷🏻‍♀️ #ios11 @applesupport https://t.co/bxrvfeixxq \n", + "2 @693975 we can assist you. we recommend updat to io 11.1.1 if you haven't had the chanc to do so... \n", + "3 @331912 @115955 that better than have an unstabl connect that drop everi 5-20 min \n", + "4 @virginamerica is probabl one of the best airlin i'v ever experienced. \n", + "\n", + " text_lemmatized \n", + "0 @161252 what's that egg website people talk about \n", + "1 why!🤷🏻‍♀️ #ios11 @applesupport https://t.co/bxrvfeixxq \n", + "2 @693975 we can assist you. we recommend updating to io 11.1.1 if you haven't had the chance to d... \n", + "3 @331912 @115955 thats better than having an unstable connection that drop every 5-20 min \n", + "4 @virginamerica is probably one of the best airline i've ever experienced. " + ] + }, + "execution_count": 28, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from nltk.stem import WordNetLemmatizer\n", + "\n", + "\n", + "lemmatizer = WordNetLemmatizer()\n", + "def lemmatize_words(text):\n", + " return \" \".join([lemmatizer.lemmatize(word) for word in text.split()])\n", + "\n", + "df['text_lemmatized'] = df.text_lower.apply(lemmatize_words)\n", + "df.head()[['text','text_lower','text_stemmed', 'text_lemmatized']]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Redo the lemmatization process with POS tag for our dataset." + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'having'" + ] + }, + "execution_count": 29, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "lemmatizer.lemmatize('having')" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'have'" + ] + }, + "execution_count": 30, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "lemmatizer.lemmatize('having', 'v') " + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": {}, + "outputs": [], + "source": [ + "from nltk.corpus import wordnet\n", + "wordnet_map = {\"N\":wordnet.NOUN, \"V\":wordnet.VERB, \"J\":wordnet.ADJ, \"R\":wordnet.ADV}\n" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[nltk_data] Downloading package averaged_perceptron_tagger to\n", + "[nltk_data] C:\\Users\\kukre\\AppData\\Roaming\\nltk_data...\n", + "[nltk_data] Package averaged_perceptron_tagger is already up-to-\n", + "[nltk_data] date!\n" + ] + }, + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 34, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import nltk\n", + "nltk.download('averaged_perceptron_tagger')" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
texttext_lowertext_wo_puncttext_wo_stoptext_wo_stopfreqtext_wo_stopfreqraretext_stemmedtext_lemmatized
0@161252 What's that egg website people talk about@161252 what's that egg website people talk about161252 whats that egg website people talk about161252 whats egg website people talk161252 whats egg website people talk161252 whats egg website people talk@161252 what' that egg websit peopl talk about@161252 What's that egg website people talk about
1Why!🤷🏻‍♀️ #iOS11 @AppleSupport https://t.co/BXrVfeIXxqwhy!🤷🏻‍♀️ #ios11 @applesupport https://t.co/bxrvfeixxqwhy🤷🏻‍♀️ ios11 applesupport httpstcobxrvfeixxqwhy🤷🏻‍♀️ ios11 applesupport httpstcobxrvfeixxqwhy🤷🏻‍♀️ ios11 applesupport httpstcobxrvfeixxqwhy🤷🏻‍♀️ ios11 applesupport httpstcobxrvfeixxqwhy!🤷🏻‍♀️ #ios11 @applesupport https://t.co/bxrvfeixxqWhy!🤷🏻‍♀️ #iOS11 @AppleSupport https://t.co/BXrVfeIXxq
2@693975 We can assist you. We recommend updating to iOS 11.1.1 if you haven't had the chance to ...@693975 we can assist you. we recommend updating to ios 11.1.1 if you haven't had the chance to ...693975 we can assist you we recommend updating to ios 1111 if you havent had the chance to do so...693975 assist recommend updating ios 1111 havent chance also dm us following link futher support...693975 assist recommend updating ios 1111 havent chance also following link futher support https...693975 assist recommend updating ios 1111 havent chance also following link futher support https...@693975 we can assist you. we recommend updat to io 11.1.1 if you haven't had the chanc to do so...@693975 We can assist you. We recommend update to iOS 11.1.1 if you haven't have the chance to d...
3@331912 @115955 Thats better than having an unstable connection that drops every 5-20 mins@331912 @115955 thats better than having an unstable connection that drops every 5-20 mins331912 115955 thats better than having an unstable connection that drops every 5 20 mins331912 115955 thats better unstable connection drops every 5 20 mins331912 115955 thats better unstable connection drops every 5 20 mins331912 115955 thats better unstable connection drops every 5 20 mins@331912 @115955 that better than have an unstabl connect that drop everi 5-20 min@331912 @115955 Thats good than have an unstable connection that drop every 5-20 min
4@VirginAmerica is probably one of the best airlines I've ever experienced.@virginamerica is probably one of the best airlines i've ever experienced.virginamerica is probably one of the best airlines ive ever experiencedvirginamerica probably one best airlines ive ever experiencedvirginamerica probably one best airlines ive ever experiencedvirginamerica probably one best airlines ive ever experienced@virginamerica is probabl one of the best airlin i'v ever experienced.@VirginAmerica be probably one of the best airline I've ever experienced.
\n", + "
" + ], + "text/plain": [ + " text \\\n", + "0 @161252 What's that egg website people talk about \n", + "1 Why!🤷🏻‍♀️ #iOS11 @AppleSupport https://t.co/BXrVfeIXxq \n", + "2 @693975 We can assist you. We recommend updating to iOS 11.1.1 if you haven't had the chance to ... \n", + "3 @331912 @115955 Thats better than having an unstable connection that drops every 5-20 mins \n", + "4 @VirginAmerica is probably one of the best airlines I've ever experienced. \n", + "\n", + " text_lower \\\n", + "0 @161252 what's that egg website people talk about \n", + "1 why!🤷🏻‍♀️ #ios11 @applesupport https://t.co/bxrvfeixxq \n", + "2 @693975 we can assist you. we recommend updating to ios 11.1.1 if you haven't had the chance to ... \n", + "3 @331912 @115955 thats better than having an unstable connection that drops every 5-20 mins \n", + "4 @virginamerica is probably one of the best airlines i've ever experienced. \n", + "\n", + " text_wo_punct \\\n", + "0 161252 whats that egg website people talk about \n", + "1 why🤷🏻‍♀️ ios11 applesupport httpstcobxrvfeixxq \n", + "2 693975 we can assist you we recommend updating to ios 1111 if you havent had the chance to do so... \n", + "3 331912 115955 thats better than having an unstable connection that drops every 5 20 mins \n", + "4 virginamerica is probably one of the best airlines ive ever experienced \n", + "\n", + " text_wo_stop \\\n", + "0 161252 whats egg website people talk \n", + "1 why🤷🏻‍♀️ ios11 applesupport httpstcobxrvfeixxq \n", + "2 693975 assist recommend updating ios 1111 havent chance also dm us following link futher support... \n", + "3 331912 115955 thats better unstable connection drops every 5 20 mins \n", + "4 virginamerica probably one best airlines ive ever experienced \n", + "\n", + " text_wo_stopfreq \\\n", + "0 161252 whats egg website people talk \n", + "1 why🤷🏻‍♀️ ios11 applesupport httpstcobxrvfeixxq \n", + "2 693975 assist recommend updating ios 1111 havent chance also following link futher support https... \n", + "3 331912 115955 thats better unstable connection drops every 5 20 mins \n", + "4 virginamerica probably one best airlines ive ever experienced \n", + "\n", + " text_wo_stopfreqrare \\\n", + "0 161252 whats egg website people talk \n", + "1 why🤷🏻‍♀️ ios11 applesupport httpstcobxrvfeixxq \n", + "2 693975 assist recommend updating ios 1111 havent chance also following link futher support https... \n", + "3 331912 115955 thats better unstable connection drops every 5 20 mins \n", + "4 virginamerica probably one best airlines ive ever experienced \n", + "\n", + " text_stemmed \\\n", + "0 @161252 what' that egg websit peopl talk about \n", + "1 why!🤷🏻‍♀️ #ios11 @applesupport https://t.co/bxrvfeixxq \n", + "2 @693975 we can assist you. we recommend updat to io 11.1.1 if you haven't had the chanc to do so... \n", + "3 @331912 @115955 that better than have an unstabl connect that drop everi 5-20 min \n", + "4 @virginamerica is probabl one of the best airlin i'v ever experienced. \n", + "\n", + " text_lemmatized \n", + "0 @161252 What's that egg website people talk about \n", + "1 Why!🤷🏻‍♀️ #iOS11 @AppleSupport https://t.co/BXrVfeIXxq \n", + "2 @693975 We can assist you. We recommend update to iOS 11.1.1 if you haven't have the chance to d... \n", + "3 @331912 @115955 Thats good than have an unstable connection that drop every 5-20 min \n", + "4 @VirginAmerica be probably one of the best airline I've ever experienced. " + ] + }, + "execution_count": 35, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from nltk.corpus import wordnet\n", + "\n", + "lemmatizer = WordNetLemmatizer()\n", + "wordnet_map = {\"N\":wordnet.NOUN, \"V\":wordnet.VERB, \"J\":wordnet.ADJ, \"R\":wordnet.ADV}\n", + "\n", + "def lemmatize_words(text):\n", + " pos_tagged_text = nltk.pos_tag(text.split())\n", + " return \" \".join([lemmatizer.lemmatize(word, wordnet_map.get(pos[0], wordnet.NOUN)) for word, pos in pos_tagged_text])\n", + "\n", + "df[\"text_lemmatized\"] = df[\"text\"].apply(lambda text: lemmatize_words(text))\n", + "df.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Removal of Emojis\n", + "\n", + "With more and more usage of social media platforms, there is an explosion in the usage of emojis in our day to day life as well. Probably we might need to remove these emojis for some of our textual analysis.\n", + "\n", + "Thanks to [this code](https://stackoverflow.com/a/58356570/8210613), please find below a helper function to remove emojis from our text." + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
texttext_lowertext_wo_puncttext_wo_stoptext_wo_stopfreqtext_wo_stopfreqraretext_stemmedtext_lemmatizedtext_no_emojitext_no_url
0@161252 What's that egg website people talk about@161252 what's that egg website people talk about161252 whats that egg website people talk about161252 whats egg website people talk161252 whats egg website people talk161252 whats egg website people talk@161252 what' that egg websit peopl talk about@161252 What's that egg website people talk about@161252 What's that egg website people talk about@161252 What's that egg website people talk about
1Why!🤷🏻‍♀️ #iOS11 @AppleSupport https://t.co/BXrVfeIXxqwhy!🤷🏻‍♀️ #ios11 @applesupport https://t.co/bxrvfeixxqwhy🤷🏻‍♀️ ios11 applesupport httpstcobxrvfeixxqwhy🤷🏻‍♀️ ios11 applesupport httpstcobxrvfeixxqwhy🤷🏻‍♀️ ios11 applesupport httpstcobxrvfeixxqwhy🤷🏻‍♀️ ios11 applesupport httpstcobxrvfeixxqwhy!🤷🏻‍♀️ #ios11 @applesupport https://t.co/bxrvfeixxqWhy!🤷🏻‍♀️ #iOS11 @AppleSupport https://t.co/BXrVfeIXxqWhy! #iOS11 @AppleSupport https://t.co/BXrVfeIXxqWhy!🤷🏻‍♀️ #iOS11 @AppleSupport
2@693975 We can assist you. We recommend updating to iOS 11.1.1 if you haven't had the chance to ...@693975 we can assist you. we recommend updating to ios 11.1.1 if you haven't had the chance to ...693975 we can assist you we recommend updating to ios 1111 if you havent had the chance to do so...693975 assist recommend updating ios 1111 havent chance also dm us following link futher support...693975 assist recommend updating ios 1111 havent chance also following link futher support https...693975 assist recommend updating ios 1111 havent chance also following link futher support https...@693975 we can assist you. we recommend updat to io 11.1.1 if you haven't had the chanc to do so...@693975 We can assist you. We recommend update to iOS 11.1.1 if you haven't have the chance to d...@693975 We can assist you. We recommend updating to iOS 11.1.1 if you haven't had the chance to ...@693975 We can assist you. We recommend updating to iOS 11.1.1 if you haven't had the chance to ...
3@331912 @115955 Thats better than having an unstable connection that drops every 5-20 mins@331912 @115955 thats better than having an unstable connection that drops every 5-20 mins331912 115955 thats better than having an unstable connection that drops every 5 20 mins331912 115955 thats better unstable connection drops every 5 20 mins331912 115955 thats better unstable connection drops every 5 20 mins331912 115955 thats better unstable connection drops every 5 20 mins@331912 @115955 that better than have an unstabl connect that drop everi 5-20 min@331912 @115955 Thats good than have an unstable connection that drop every 5-20 min@331912 @115955 Thats better than having an unstable connection that drops every 5-20 mins@331912 @115955 Thats better than having an unstable connection that drops every 5-20 mins
4@VirginAmerica is probably one of the best airlines I've ever experienced.@virginamerica is probably one of the best airlines i've ever experienced.virginamerica is probably one of the best airlines ive ever experiencedvirginamerica probably one best airlines ive ever experiencedvirginamerica probably one best airlines ive ever experiencedvirginamerica probably one best airlines ive ever experienced@virginamerica is probabl one of the best airlin i'v ever experienced.@VirginAmerica be probably one of the best airline I've ever experienced.@VirginAmerica is probably one of the best airlines I've ever experienced.@VirginAmerica is probably one of the best airlines I've ever experienced.
\n", + "
" + ], + "text/plain": [ + " text \\\n", + "0 @161252 What's that egg website people talk about \n", + "1 Why!🤷🏻‍♀️ #iOS11 @AppleSupport https://t.co/BXrVfeIXxq \n", + "2 @693975 We can assist you. We recommend updating to iOS 11.1.1 if you haven't had the chance to ... \n", + "3 @331912 @115955 Thats better than having an unstable connection that drops every 5-20 mins \n", + "4 @VirginAmerica is probably one of the best airlines I've ever experienced. \n", + "\n", + " text_lower \\\n", + "0 @161252 what's that egg website people talk about \n", + "1 why!🤷🏻‍♀️ #ios11 @applesupport https://t.co/bxrvfeixxq \n", + "2 @693975 we can assist you. we recommend updating to ios 11.1.1 if you haven't had the chance to ... \n", + "3 @331912 @115955 thats better than having an unstable connection that drops every 5-20 mins \n", + "4 @virginamerica is probably one of the best airlines i've ever experienced. \n", + "\n", + " text_wo_punct \\\n", + "0 161252 whats that egg website people talk about \n", + "1 why🤷🏻‍♀️ ios11 applesupport httpstcobxrvfeixxq \n", + "2 693975 we can assist you we recommend updating to ios 1111 if you havent had the chance to do so... \n", + "3 331912 115955 thats better than having an unstable connection that drops every 5 20 mins \n", + "4 virginamerica is probably one of the best airlines ive ever experienced \n", + "\n", + " text_wo_stop \\\n", + "0 161252 whats egg website people talk \n", + "1 why🤷🏻‍♀️ ios11 applesupport httpstcobxrvfeixxq \n", + "2 693975 assist recommend updating ios 1111 havent chance also dm us following link futher support... \n", + "3 331912 115955 thats better unstable connection drops every 5 20 mins \n", + "4 virginamerica probably one best airlines ive ever experienced \n", + "\n", + " text_wo_stopfreq \\\n", + "0 161252 whats egg website people talk \n", + "1 why🤷🏻‍♀️ ios11 applesupport httpstcobxrvfeixxq \n", + "2 693975 assist recommend updating ios 1111 havent chance also following link futher support https... \n", + "3 331912 115955 thats better unstable connection drops every 5 20 mins \n", + "4 virginamerica probably one best airlines ive ever experienced \n", + "\n", + " text_wo_stopfreqrare \\\n", + "0 161252 whats egg website people talk \n", + "1 why🤷🏻‍♀️ ios11 applesupport httpstcobxrvfeixxq \n", + "2 693975 assist recommend updating ios 1111 havent chance also following link futher support https... \n", + "3 331912 115955 thats better unstable connection drops every 5 20 mins \n", + "4 virginamerica probably one best airlines ive ever experienced \n", + "\n", + " text_stemmed \\\n", + "0 @161252 what' that egg websit peopl talk about \n", + "1 why!🤷🏻‍♀️ #ios11 @applesupport https://t.co/bxrvfeixxq \n", + "2 @693975 we can assist you. we recommend updat to io 11.1.1 if you haven't had the chanc to do so... \n", + "3 @331912 @115955 that better than have an unstabl connect that drop everi 5-20 min \n", + "4 @virginamerica is probabl one of the best airlin i'v ever experienced. \n", + "\n", + " text_lemmatized \\\n", + "0 @161252 What's that egg website people talk about \n", + "1 Why!🤷🏻‍♀️ #iOS11 @AppleSupport https://t.co/BXrVfeIXxq \n", + "2 @693975 We can assist you. We recommend update to iOS 11.1.1 if you haven't have the chance to d... \n", + "3 @331912 @115955 Thats good than have an unstable connection that drop every 5-20 min \n", + "4 @VirginAmerica be probably one of the best airline I've ever experienced. \n", + "\n", + " text_no_emoji \\\n", + "0 @161252 What's that egg website people talk about \n", + "1 Why! #iOS11 @AppleSupport https://t.co/BXrVfeIXxq \n", + "2 @693975 We can assist you. We recommend updating to iOS 11.1.1 if you haven't had the chance to ... \n", + "3 @331912 @115955 Thats better than having an unstable connection that drops every 5-20 mins \n", + "4 @VirginAmerica is probably one of the best airlines I've ever experienced. \n", + "\n", + " text_no_url \n", + "0 @161252 What's that egg website people talk about \n", + "1 Why!🤷🏻‍♀️ #iOS11 @AppleSupport \n", + "2 @693975 We can assist you. We recommend updating to iOS 11.1.1 if you haven't had the chance to ... \n", + "3 @331912 @115955 Thats better than having an unstable connection that drops every 5-20 mins \n", + "4 @VirginAmerica is probably one of the best airlines I've ever experienced. " + ] + }, + "execution_count": 41, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#https://stackoverflow.com/a/58356570/8210613\n", + "def remove_emoji(data):\n", + " emoj = re.compile(\"[\"\n", + " u\"\\U0001F600-\\U0001F64F\" # emoticons\n", + " u\"\\U0001F300-\\U0001F5FF\" # symbols & pictographs\n", + " u\"\\U0001F680-\\U0001F6FF\" # transport & map symbols\n", + " u\"\\U0001F1E0-\\U0001F1FF\" # flags (iOS)\n", + " u\"\\U00002500-\\U00002BEF\" # chinese char\n", + " u\"\\U00002702-\\U000027B0\"\n", + " u\"\\U00002702-\\U000027B0\"\n", + " u\"\\U000024C2-\\U0001F251\"\n", + " u\"\\U0001f926-\\U0001f937\"\n", + " u\"\\U00010000-\\U0010ffff\"\n", + " u\"\\u2640-\\u2642\" \n", + " u\"\\u2600-\\u2B55\"\n", + " u\"\\u200d\"\n", + " u\"\\u23cf\"\n", + " u\"\\u23e9\"\n", + " u\"\\u231a\"\n", + " u\"\\ufe0f\" # dingbats\n", + " u\"\\u3030\"\n", + " \"]+\", re.UNICODE)\n", + " return re.sub(emoj, '', data)\n", + "\n", + "df[\"text_no_emoji\"] = df[\"text\"].apply(remove_emoji)\n", + "df.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Removal of URLs\n", + "\n", + "Next preprocessing step is to remove any URLs present in the data. For example, if we are doing a twitter analysis, then there is a good chance that the tweet will have some URL in it. Probably we might need to remove them for our further analysis.\n", + "\n", + "We can use the below code snippet to do that" + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
texttext_lowertext_wo_puncttext_wo_stoptext_wo_stopfreqtext_wo_stopfreqraretext_stemmedtext_lemmatizedtext_no_emojitext_no_url
0@161252 What's that egg website people talk about@161252 what's that egg website people talk about161252 whats that egg website people talk about161252 whats egg website people talk161252 whats egg website people talk161252 whats egg website people talk@161252 what' that egg websit peopl talk about@161252 What's that egg website people talk about@161252 What's that egg website people talk about@161252 What's that egg website people talk about
1Why!🤷🏻‍♀️ #iOS11 @AppleSupport https://t.co/BXrVfeIXxqwhy!🤷🏻‍♀️ #ios11 @applesupport https://t.co/bxrvfeixxqwhy🤷🏻‍♀️ ios11 applesupport httpstcobxrvfeixxqwhy🤷🏻‍♀️ ios11 applesupport httpstcobxrvfeixxqwhy🤷🏻‍♀️ ios11 applesupport httpstcobxrvfeixxqwhy🤷🏻‍♀️ ios11 applesupport httpstcobxrvfeixxqwhy!🤷🏻‍♀️ #ios11 @applesupport https://t.co/bxrvfeixxqWhy!🤷🏻‍♀️ #iOS11 @AppleSupport https://t.co/BXrVfeIXxqWhy! #iOS11 @AppleSupport https://t.co/BXrVfeIXxqWhy!🤷🏻‍♀️ #iOS11 @AppleSupport
2@693975 We can assist you. We recommend updating to iOS 11.1.1 if you haven't had the chance to ...@693975 we can assist you. we recommend updating to ios 11.1.1 if you haven't had the chance to ...693975 we can assist you we recommend updating to ios 1111 if you havent had the chance to do so...693975 assist recommend updating ios 1111 havent chance also dm us following link futher support...693975 assist recommend updating ios 1111 havent chance also following link futher support https...693975 assist recommend updating ios 1111 havent chance also following link futher support https...@693975 we can assist you. we recommend updat to io 11.1.1 if you haven't had the chanc to do so...@693975 We can assist you. We recommend update to iOS 11.1.1 if you haven't have the chance to d...@693975 We can assist you. We recommend updating to iOS 11.1.1 if you haven't had the chance to ...@693975 We can assist you. We recommend updating to iOS 11.1.1 if you haven't had the chance to ...
3@331912 @115955 Thats better than having an unstable connection that drops every 5-20 mins@331912 @115955 thats better than having an unstable connection that drops every 5-20 mins331912 115955 thats better than having an unstable connection that drops every 5 20 mins331912 115955 thats better unstable connection drops every 5 20 mins331912 115955 thats better unstable connection drops every 5 20 mins331912 115955 thats better unstable connection drops every 5 20 mins@331912 @115955 that better than have an unstabl connect that drop everi 5-20 min@331912 @115955 Thats good than have an unstable connection that drop every 5-20 min@331912 @115955 Thats better than having an unstable connection that drops every 5-20 mins@331912 @115955 Thats better than having an unstable connection that drops every 5-20 mins
4@VirginAmerica is probably one of the best airlines I've ever experienced.@virginamerica is probably one of the best airlines i've ever experienced.virginamerica is probably one of the best airlines ive ever experiencedvirginamerica probably one best airlines ive ever experiencedvirginamerica probably one best airlines ive ever experiencedvirginamerica probably one best airlines ive ever experienced@virginamerica is probabl one of the best airlin i'v ever experienced.@VirginAmerica be probably one of the best airline I've ever experienced.@VirginAmerica is probably one of the best airlines I've ever experienced.@VirginAmerica is probably one of the best airlines I've ever experienced.
\n", + "
" + ], + "text/plain": [ + " text \\\n", + "0 @161252 What's that egg website people talk about \n", + "1 Why!🤷🏻‍♀️ #iOS11 @AppleSupport https://t.co/BXrVfeIXxq \n", + "2 @693975 We can assist you. We recommend updating to iOS 11.1.1 if you haven't had the chance to ... \n", + "3 @331912 @115955 Thats better than having an unstable connection that drops every 5-20 mins \n", + "4 @VirginAmerica is probably one of the best airlines I've ever experienced. \n", + "\n", + " text_lower \\\n", + "0 @161252 what's that egg website people talk about \n", + "1 why!🤷🏻‍♀️ #ios11 @applesupport https://t.co/bxrvfeixxq \n", + "2 @693975 we can assist you. we recommend updating to ios 11.1.1 if you haven't had the chance to ... \n", + "3 @331912 @115955 thats better than having an unstable connection that drops every 5-20 mins \n", + "4 @virginamerica is probably one of the best airlines i've ever experienced. \n", + "\n", + " text_wo_punct \\\n", + "0 161252 whats that egg website people talk about \n", + "1 why🤷🏻‍♀️ ios11 applesupport httpstcobxrvfeixxq \n", + "2 693975 we can assist you we recommend updating to ios 1111 if you havent had the chance to do so... \n", + "3 331912 115955 thats better than having an unstable connection that drops every 5 20 mins \n", + "4 virginamerica is probably one of the best airlines ive ever experienced \n", + "\n", + " text_wo_stop \\\n", + "0 161252 whats egg website people talk \n", + "1 why🤷🏻‍♀️ ios11 applesupport httpstcobxrvfeixxq \n", + "2 693975 assist recommend updating ios 1111 havent chance also dm us following link futher support... \n", + "3 331912 115955 thats better unstable connection drops every 5 20 mins \n", + "4 virginamerica probably one best airlines ive ever experienced \n", + "\n", + " text_wo_stopfreq \\\n", + "0 161252 whats egg website people talk \n", + "1 why🤷🏻‍♀️ ios11 applesupport httpstcobxrvfeixxq \n", + "2 693975 assist recommend updating ios 1111 havent chance also following link futher support https... \n", + "3 331912 115955 thats better unstable connection drops every 5 20 mins \n", + "4 virginamerica probably one best airlines ive ever experienced \n", + "\n", + " text_wo_stopfreqrare \\\n", + "0 161252 whats egg website people talk \n", + "1 why🤷🏻‍♀️ ios11 applesupport httpstcobxrvfeixxq \n", + "2 693975 assist recommend updating ios 1111 havent chance also following link futher support https... \n", + "3 331912 115955 thats better unstable connection drops every 5 20 mins \n", + "4 virginamerica probably one best airlines ive ever experienced \n", + "\n", + " text_stemmed \\\n", + "0 @161252 what' that egg websit peopl talk about \n", + "1 why!🤷🏻‍♀️ #ios11 @applesupport https://t.co/bxrvfeixxq \n", + "2 @693975 we can assist you. we recommend updat to io 11.1.1 if you haven't had the chanc to do so... \n", + "3 @331912 @115955 that better than have an unstabl connect that drop everi 5-20 min \n", + "4 @virginamerica is probabl one of the best airlin i'v ever experienced. \n", + "\n", + " text_lemmatized \\\n", + "0 @161252 What's that egg website people talk about \n", + "1 Why!🤷🏻‍♀️ #iOS11 @AppleSupport https://t.co/BXrVfeIXxq \n", + "2 @693975 We can assist you. We recommend update to iOS 11.1.1 if you haven't have the chance to d... \n", + "3 @331912 @115955 Thats good than have an unstable connection that drop every 5-20 min \n", + "4 @VirginAmerica be probably one of the best airline I've ever experienced. \n", + "\n", + " text_no_emoji \\\n", + "0 @161252 What's that egg website people talk about \n", + "1 Why! #iOS11 @AppleSupport https://t.co/BXrVfeIXxq \n", + "2 @693975 We can assist you. We recommend updating to iOS 11.1.1 if you haven't had the chance to ... \n", + "3 @331912 @115955 Thats better than having an unstable connection that drops every 5-20 mins \n", + "4 @VirginAmerica is probably one of the best airlines I've ever experienced. \n", + "\n", + " text_no_url \n", + "0 @161252 What's that egg website people talk about \n", + "1 Why!🤷🏻‍♀️ #iOS11 @AppleSupport \n", + "2 @693975 We can assist you. We recommend updating to iOS 11.1.1 if you haven't had the chance to ... \n", + "3 @331912 @115955 Thats better than having an unstable connection that drops every 5-20 mins \n", + "4 @VirginAmerica is probably one of the best airlines I've ever experienced. " + ] + }, + "execution_count": 42, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "def remove_urls(text):\n", + " url_pattern = re.compile(r'https?://\\S+|www\\.\\S+')\n", + " return url_pattern.sub(r'', text)\n", + "\n", + "\n", + "df[\"text_no_url\"] = df[\"text\"].apply(remove_urls)\n", + "df.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Discussion activity:\n", + "\n", + "* What usecases can you think for NLP?\n", + " - analysis of speech - news article, transcription of speeches -- topic modelling vs topic classification \n", + "\n", + "* What role does preprocessing play in the application of NLP?" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.3" + }, + "toc-autonumbering": true + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/IPL Dataset analysis using more loops and conditionals/notebook/IPL_data_analysis-MK.ipynb b/IPL Dataset analysis using more loops and conditionals/notebook/IPL_data_analysis-MK.ipynb new file mode 100644 index 0000000..dbcccd9 --- /dev/null +++ b/IPL Dataset analysis using more loops and conditionals/notebook/IPL_data_analysis-MK.ipynb @@ -0,0 +1,1549 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# IPL Dataset Analysis\n", + "\n", + "## Problem Statement\n", + "We want to know as to what happens during an IPL match which raises several questions in our mind with our limited knowledge about the game called cricket on which it is based. This analysis is done to know as which factors led one of the team to win and how does it matter.\n", + "\n", + "## About the Dataset :\n", + "The Indian Premier League (IPL) is a professional T20 cricket league in India contested during April-May of every year by teams representing Indian cities. It is the most-attended cricket league in the world and ranks sixth among all the sports leagues. It has teams with players from around the world and is very competitive and entertaining with a lot of close matches between teams.\n", + "\n", + "The IPL and other cricket related datasets are available at [cricsheet.org](https://cricsheet.org). Feel free to visit the website and explore the data by yourself as exploring new sources of data is one of the interesting activities a data scientist gets to do.\n", + "\n", + "## About the dataset:\n", + "Snapshot of the data you will be working on:
\n", + "
\n", + "The dataset 136522 data points and 23 features
\n", + "\n", + "|Features|Description|\n", + "|-----|-----|\n", + "|match_code|Code pertaining to individual match|\n", + "|date|Date of the match played|\n", + "|city|City where the match was played|\n", + "|venue|Stadium in that city where the match was played|\n", + "|team1|team1|\n", + "|team2|team2|\n", + "|toss_winner|Who won the toss out of two teams|\n", + "|toss_decision|toss decision taken by toss winner|\n", + "|winner|Winner of that match between two teams|\n", + "|win_type|How did the team won(by wickets or runs etc.)|\n", + "|win_margin|difference with which the team won| \n", + "|inning|inning type(1st or 2nd)|\n", + "|delivery|ball delivery|\n", + "|batting_team|current team on batting|\n", + "|batsman|current batsman on strike|\n", + "|non_striker|batsman on non-strike|\n", + "|bowler|Current bowler|\n", + "|runs|runs scored|\n", + "|extras|extra run scored|\n", + "|total|total run scored on that delivery including runs and extras|\n", + "|extras_type|extra run scored by wides or no ball or legby|\n", + "|player_out|player that got out|\n", + "|wicket_kind|How did the player got out|\n", + "|wicket_fielders|Fielder who caught out the player by catch|\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Load the data" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(136522, 24)" + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Read the data using pandas module.\n", + "import pandas as pd\n", + "import numpy as np\n", + "\n", + "df_ipl = pd.read_csv('../data/ipl_data.csv')\n", + "df_ipl.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "match_code 392203\n", + "date 2009-05-01\n", + "city East London\n", + "venue Buffalo Park\n", + "team1 Kolkata Knight Riders\n", + "team2 Mumbai Indians\n", + "toss_winner Mumbai Indians\n", + "toss_decision bat\n", + "winner Mumbai Indians\n", + "win_type runs\n", + "win_margin 9\n", + "inning 1\n", + "delivery 0.1\n", + "batting_team Mumbai Indians\n", + "batsman ST Jayasuriya\n", + "non_striker SR Tendulkar\n", + "bowler I Sharma\n", + "runs 0\n", + "extras 1\n", + "total 1\n", + "extras_type wides\n", + "player_out NaN\n", + "wicket_kind NaN\n", + "wicket_fielders NaN\n", + "Name: 0, dtype: object" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_ipl.iloc[0]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## There are matches being played all around the world. Find the list of unique cities where matches are being played throughout the world." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Cities these matches were played at ['East London' 'Port Elizabeth' 'Centurion' 'neutral_venue' 'Chennai'\n", + " 'Jaipur' 'Kolkata' 'Delhi' 'Chandigarh' 'Hyderabad' 'Ranchi' 'Mumbai'\n", + " 'Bangalore' 'Dharamsala' 'Pune' 'Rajkot' 'Durban' 'Cuttack' 'Cape Town'\n", + " 'Ahmedabad' 'Johannesburg' 'Visakhapatnam' 'Abu Dhabi' 'Raipur' 'Kochi'\n", + " 'Kimberley' 'Nagpur' 'Bloemfontein' 'Indore' 'Kanpur']\n" + ] + } + ], + "source": [ + "print('Cities these matches were played at',\n", + " df_ipl.city.unique())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Find columns containing null values if any." + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "RangeIndex: 136522 entries, 0 to 136521\n", + "Data columns (total 24 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 match_code 136522 non-null int64 \n", + " 1 date 136522 non-null object \n", + " 2 city 136522 non-null object \n", + " 3 venue 136522 non-null object \n", + " 4 team1 136522 non-null object \n", + " 5 team2 136522 non-null object \n", + " 6 toss_winner 136522 non-null object \n", + " 7 toss_decision 136522 non-null object \n", + " 8 winner 134704 non-null object \n", + " 9 win_type 134704 non-null object \n", + " 10 win_margin 134704 non-null float64\n", + " 11 inning 136522 non-null int64 \n", + " 12 delivery 136522 non-null float64\n", + " 13 batting_team 136522 non-null object \n", + " 14 batsman 136522 non-null object \n", + " 15 non_striker 136522 non-null object \n", + " 16 bowler 136522 non-null object \n", + " 17 runs 136522 non-null int64 \n", + " 18 extras 136522 non-null int64 \n", + " 19 total 136522 non-null int64 \n", + " 20 extras_type 7458 non-null object \n", + " 21 player_out 6715 non-null object \n", + " 22 wicket_kind 6715 non-null object \n", + " 23 wicket_fielders 4865 non-null object \n", + "dtypes: float64(2), int64(5), object(17)\n", + "memory usage: 25.0+ MB\n" + ] + } + ], + "source": [ + "df_ipl.info()" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Columns with null values are ['winner', 'win_type', 'win_margin', 'extras_type', 'player_out', 'wicket_kind', 'wicket_fielders']\n" + ] + } + ], + "source": [ + "nulls = df_ipl.isnull().sum()\n", + "print(\"Columns with null values are\",\n", + " nulls[nulls>0].index.to_list())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Matches are played throughout the world in different countries but they may or may not have multiple venues(stadiums where matches are played). Find the top 5 venues where the most matches are played.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 45, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'Chandigarh': array(['Punjab Cricket Association Stadium, Mohali',\n", + " 'Punjab Cricket Association IS Bindra Stadium, Mohali'],\n", + " dtype=object),\n", + " 'Mumbai': array(['Wankhede Stadium', 'Brabourne Stadium',\n", + " 'Dr DY Patil Sports Academy'], dtype=object),\n", + " 'Pune': array(['Maharashtra Cricket Association Stadium',\n", + " 'Subrata Roy Sahara Stadium'], dtype=object),\n", + " 'neutral_venue': array(['Dubai International Cricket Stadium', 'Sharjah Cricket Stadium'],\n", + " dtype=object)}" + ] + }, + "execution_count": 45, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "city_venue = df_ipl.groupby('city').venue.nunique()\n", + "multi_stadium_cities = city_venue[city_venue>1].index.to_list()\n", + "\n", + "df_ipl.loc[df_ipl.city.isin(multi_stadium_cities),].groupby('city').venue.unique().to_dict()\n", + "#.value_counts().index.to_list()" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "city\n", + "Mumbai 77\n", + "Bangalore 58\n", + "Kolkata 54\n", + "Delhi 53\n", + "Chennai 48\n", + "Name: match_code, dtype: int64" + ] + }, + "execution_count": 36, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_ipl.groupby(['city']).match_code.nunique().nlargest(5)" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "city venue \n", + "Bangalore M Chinnaswamy Stadium 58\n", + "Kolkata Eden Gardens 54\n", + "Delhi Feroz Shah Kotla 53\n", + "Mumbai Wankhede Stadium 49\n", + "Chennai MA Chidambaram Stadium, Chepauk 48\n", + "Name: match_code, dtype: int64" + ] + }, + "execution_count": 35, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_ipl.groupby(['city','venue']).match_code.nunique().nlargest(5)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Find out how the runs were scored that is the runs count frequency table( number of singles, doubles, boundaries, sixes etc were scored)." + ] + }, + { + "cell_type": "code", + "execution_count": 54, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 54, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAArkAAAHSCAYAAADohdOwAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAATGUlEQVR4nO3dX4yld13H8c/XnSIWcREpxmzBhUiMaBV00mBqjFajhTXihReQqGhI9kYNJiZmiYmJd+uN/+KfpEEUI4qKEgn1XwMSQoLFKRTa0lYrrqEtuhJ1LTYBu369mNM4LNvO2dnz7Ha+vF7Jycx55pnffre/NPveZ549p7o7AAAwyRdd7QEAAGDTRC4AAOOIXAAAxhG5AACMI3IBABhH5AIAMM7WEos+73nP6+PHjy+xNAAAJEnuvPPOT3X3dRf72iKRe/z48ezs7CyxNAAAJEmq6p+f7GtuVwAAYByRCwDAOCIXAIBxRC4AAOOIXAAAxhG5AACMI3IBABhH5AIAMI7IBQBgHJELAMA4IhcAgHFELgAA44hcAADG2Vpi0bsfPpfjp25bYulD5czpE1d7BACAL0iu5AIAMI7IBQBgHJELAMA4IhcAgHFELgAA44hcAADGEbkAAIwjcgEAGEfkAgAwjsgFAGAckQsAwDhrRW5VPaeq3l5V91fVfVX1rUsPBgAAB7W15nm/kuQvu/sHq+oZSa5dcCYAALgs+0ZuVX1Zkm9P8qNJ0t2fTfLZZccCAICDW+d2hRcn+bckv11VH66qN1XVsxaeCwAADmydyN1K8s1JfrO7X57kv5OcuvCkqjpZVTtVtXP+sXMbHhMAANa3TuQ+lOSh7r5j9fzt2Y3ez9Hdt3b3dndvH7n26CZnBACAS7Jv5Hb3vyT5RFV97erQdyX52KJTAQDAZVj31RV+MslbV6+s8PEkP7bcSAAAcHnWitzuvivJ9sKzAADARnjHMwAAxhG5AACMI3IBABhH5AIAMI7IBQBgHJELAMA4IhcAgHFELgAA44hcAADGEbkAAIwjcgEAGGdriUVvOHY0O6dPLLE0AADsy5VcAADGEbkAAIwjcgEAGEfkAgAwjsgFAGAckQsAwDgiFwCAcUQuAADjiFwAAMYRuQAAjCNyAQAYR+QCADCOyAUAYByRCwDAOCIXAIBxRC4AAOOIXAAAxhG5AACMI3IBABhH5AIAMI7IBQBgHJELAMA4IhcAgHFELgAA44hcAADGEbkAAIwjcgEAGEfkAgAwjsgFAGAckQsAwDgiFwCAcUQuAADjiFwAAMYRuQAAjCNyAQAYR+QCADCOyAUAYByRCwDAOCIXAIBxRC4AAOOIXAAAxtla56SqOpPk0STnkzze3dtLDgUAAJdjrchd+c7u/tRikwAAwIa4XQEAgHHWjdxO8tdVdWdVnVxyIAAAuFzr3q5wU3c/UlXPT3J7Vd3f3e/be8Iqfk8myQtf+MINjwkAAOtb60pudz+y+ng2yTuS3HiRc27t7u3u3r7uuus2OyUAAFyCfSO3qp5VVc9+4vMk35PknqUHAwCAg1rndoWvTPKOqnri/N/v7r9cdCoAALgM+0Zud388yTddgVkAAGAjvIQYAADjiFwAAMYRuQAAjCNyAQAYR+QCADCOyAUAYByRCwDAOCIXAIBxRC4AAOOIXAAAxhG5AACMI3IBABhH5AIAMI7IBQBgHJELAMA4IhcAgHFELgAA44hcAADGEbkAAIwjcgEAGEfkAgAwjsgFAGAckQsAwDgiFwCAcUQuAADjiFwAAMYRuQAAjCNyAQAYR+QCADCOyAUAYByRCwDAOCIXAIBxRC4AAOOIXAAAxhG5AACMI3IBABhH5AIAMI7IBQBgHJELAMA4IhcAgHFELgAA44hcAADG2Vpi0bsfPpfjp25bYmlY1JnTJ672CADABriSCwDAOCIXAIBxRC4AAOOIXAAAxhG5AACMI3IBABhH5AIAMI7IBQBgHJELAMA4IhcAgHFELgAA46wduVV1pKo+XFXvWnIgAAC4XJdyJfcNSe5bahAAANiUtSK3qq5PciLJm5YdBwAALt+6V3J/OcnPJPnfJzuhqk5W1U5V7Zx/7NxGhgMAgIPYN3Kr6vuSnO3uO5/qvO6+tbu3u3v7yLVHNzYgAABcqnWu5N6U5Pur6kyStyW5uap+b9GpAADgMuwbud39xu6+vruPJ3lNkvd09w8tPhkAAByQ18kFAGCcrUs5ubvfm+S9i0wCAAAb4kouAADjiFwAAMYRuQAAjCNyAQAYR+QCADCOyAUAYByRCwDAOCIXAIBxRC4AAOOIXAAAxrmkt/Vd1w3Hjmbn9IkllgYAgH25kgsAwDgiFwCAcUQuAADjiFwAAMYRuQAAjCNyAQAYR+QCADCOyAUAYByRCwDAOCIXAIBxRC4AAOOIXAAAxhG5AACMI3IBABhH5AIAMI7IBQBgHJELAMA4IhcAgHFELgAA44hcAADGEbkAAIwjcgEAGEfkAgAwjsgFAGAckQsAwDgiFwCAcUQuAADjiFwAAMYRuQAAjCNyAQAYZ2uJRe9++FyOn7rtc46dOX1iiV8KAAA+jyu5AACMI3IBABhH5AIAMI7IBQBgHJELAMA4IhcAgHFELgAA44hcAADGEbkAAIwjcgEAGEfkAgAwzr6RW1XPrKoPVtVHqureqvr5KzEYAAAc1NYa53wmyc3d/emquibJ+6vqL7r7bxeeDQAADmTfyO3uTvLp1dNrVo9ecigAALgca92TW1VHququJGeT3N7dd1zknJNVtVNVO+cfO7fpOQEAYG1rRW53n+/ulyW5PsmNVfUNFznn1u7e7u7tI9ce3fScAACwtkt6dYXu/s8k701yyyLTAADABqzz6grXVdVzVp9/SZLvTnL/0oMBAMBBrfPqCl+V5C1VdSS7UfxH3f2uZccCAICDW+fVFT6a5OVXYBYAANgI73gGAMA4IhcAgHFELgAA44hcAADGEbkAAIwjcgEAGEfkAgAwjsgFAGAckQsAwDgiFwCAcfZ9W9+DuOHY0eycPrHE0gAAsC9XcgEAGEfkAgAwjsgFAGAckQsAwDgiFwCAcUQuAADjiFwAAMYRuQAAjCNyAQAYR+QCADCOyAUAYByRCwDAOCIXAIBxRC4AAOOIXAAAxhG5AACMI3IBABhH5AIAMI7IBQBgHJELAMA4IhcAgHFELgAA44hcAADGEbkAAIwjcgEAGEfkAgAwjsgFAGAckQsAwDgiFwCAcUQuAADjbC2x6N0Pn8vxU7ctsTQHcOb0ias9AgDAFeVKLgAA44hcAADGEbkAAIwjcgEAGEfkAgAwjsgFAGAckQsAwDgiFwCAcUQuAADjiFwAAMYRuQAAjLNv5FbVC6rqb6rqvqq6t6recCUGAwCAg9pa45zHk/x0d3+oqp6d5M6qur27P7bwbAAAcCD7Xsnt7k9294dWnz+a5L4kx5YeDAAADuqS7smtquNJXp7kjot87WRV7VTVzvnHzm1mOgAAOIC1I7eqvjTJnyT5qe7+rwu/3t23dvd2d28fufboJmcEAIBLslbkVtU12Q3ct3b3ny47EgAAXJ51Xl2hkvxWkvu6+xeXHwkAAC7POldyb0ryw0lurqq7Vo9XLTwXAAAc2L4vIdbd709SV2AWAADYCO94BgDAOCIXAIBxRC4AAOOIXAAAxhG5AACMI3IBABhH5AIAMI7IBQBgHJELAMA4IhcAgHH2fVvfg7jh2NHsnD6xxNIAALAvV3IBABhH5AIAMI7IBQBgHJELAMA4IhcAgHFELgAA44hcAADGEbkAAIwjcgEAGEfkAgAwjsgFAGAckQsAwDgiFwCAcUQuAADjiFwAAMYRuQAAjCNyAQAYR+QCADCOyAUAYByRCwDAOCIXAIBxRC4AAOOIXAAAxhG5AACMI3IBABhH5AIAMI7IBQBgHJELAMA4IhcAgHFELgAA42wtsejdD5/L8VO3LbE0AMBVd+b0ias9AvtwJRcAgHFELgAA44hcAADGEbkAAIwjcgEAGEfkAgAwjsgFAGAckQsAwDgiFwCAcUQuAADjiFwAAMbZN3Kr6s1Vdbaq7rkSAwEAwOVa50ru7yS5ZeE5AABgY/aN3O5+X5J/vwKzAADARrgnFwCAcTYWuVV1sqp2qmrn/GPnNrUsAABcso1Fbnff2t3b3b195Nqjm1oWAAAumdsVAAAYZ52XEPuDJB9I8rVV9VBVvX75sQAA4OC29juhu197JQYBAIBNcbsCAADjiFwAAMYRuQAAjCNyAQAYR+QCADCOyAUAYByRCwDAOCIXAIBxRC4AAOOIXAAAxtn3bX0P4oZjR7Nz+sQSSwMAwL5cyQUAYByRCwDAOCIXAIBxRC4AAOOIXAAAxhG5AACMI3IBABhH5AIAMI7IBQBgHJELAMA4IhcAgHFELgAA44hcAADGEbkAAIwjcgEAGEfkAgAwjsgFAGAckQsAwDgiFwCAcUQuAADjiFwAAMYRuQAAjCNyAQAYR+QCADCOyAUAYByRCwDAOCIXAIBxRC4AAOOIXAAAxhG5AACMs7XEonc/fC7HT922xNIAADxNnDl94mqP8KRcyQUAYByRCwDAOCIXAIBxRC4AAOOIXAAAxhG5AACMI3IBABhH5AIAMI7IBQBgHJELAMA4IhcAgHHWityquqWqHqiqB6vq1NJDAQDA5dg3cqvqSJJfT/LKJC9N8tqqeunSgwEAwEGtcyX3xiQPdvfHu/uzSd6W5NXLjgUAAAe3TuQeS/KJPc8fWh0DAICnpXUity5yrD/vpKqTVbVTVTvnHzt3+ZMBAMABrRO5DyV5wZ7n1yd55MKTuvvW7t7u7u0j1x7d1HwAAHDJ1oncv0vykqp6UVU9I8lrkrxz2bEAAODgtvY7obsfr6qfSPJXSY4keXN337v4ZAAAcED7Rm6SdPefJ/nzhWcBAICN8I5nAACMI3IBABhH5AIAMI7IBQBgHJELAMA4IhcAgHFELgAA44hcAADGEbkAAIwjcgEAGGett/W9VDccO5qd0yeWWBoAAPblSi4AAOOIXAAAxhG5AACMI3IBABhH5AIAMI7IBQBgHJELAMA4IhcAgHFELgAA44hcAADGEbkAAIwjcgEAGEfkAgAwjsgFAGAckQsAwDjV3ZtftOrRJA9sfGGutOcl+dTVHoLLZh9nsI8z2McZ7OPTx1d393UX+8LWQr/gA929vdDaXCFVtWMfDz/7OIN9nME+zmAfDwe3KwAAMI7IBQBgnKUi99aF1uXKso8z2McZ7OMM9nEG+3gILPIPzwAA4GpyuwIAAONsNHKr6paqeqCqHqyqU5tcm4OpqjdX1dmqumfPsedW1e1V9Q+rj1++52tvXO3fA1X1vXuOf0tV3b362q9WVa2Of3FV/eHq+B1VdfxK/v6+UFTVC6rqb6rqvqq6t6resDpuLw+RqnpmVX2wqj6y2sefXx23j4dQVR2pqg9X1btWz+3jIVNVZ1b//e+qqp3VMfs4xMYit6qOJPn1JK9M8tIkr62ql25qfQ7sd5LccsGxU0ne3d0vSfLu1fOs9us1Sb5+9T2/sdrXJPnNJCeTvGT1eGLN1yf5j+7+miS/lOQXFvudfGF7PMlPd/fXJXlFkh9f7Ze9PFw+k+Tm7v6mJC9LcktVvSL28bB6Q5L79jy3j4fTd3b3y/a8JJh9HGKTV3JvTPJgd3+8uz+b5G1JXr3B9TmA7n5fkn+/4PCrk7xl9flbkvzAnuNv6+7PdPc/JXkwyY1V9VVJvqy7P9C7N3H/7gXf88Rab0/yXU/8DZbN6e5PdveHVp8/mt0/WI/FXh4qvevTq6fXrB4d+3joVNX1SU4kedOew/ZxBvs4xCYj91iST+x5/tDqGE8/X9ndn0x24ynJ81fHn2wPj60+v/D453xPdz+e5FySr1hscrL6cdfLk9wRe3norH7EfVeSs0lu7277eDj9cpKfSfK/e47Zx8Onk/x1Vd1ZVSdXx+zjEJt8x7OL/c3ESzccLk+2h0+1t/b9CqqqL03yJ0l+qrv/6ykuCNjLp6nuPp/kZVX1nCTvqKpveIrT7ePTUFV9X5Kz3X1nVX3HOt9ykWP28enhpu5+pKqen+T2qrr/Kc61j4fMJq/kPpTkBXueX5/kkQ2uz+b86+rHK1l9PLs6/mR7+NDq8wuPf873VNVWkqP5/Nsj2ICquia7gfvW7v7T1WF7eUh1938meW92792zj4fLTUm+v6rOZPfWvJur6vdiHw+d7n5k9fFskndk99ZL+zjEJiP375K8pKpeVFXPyO7N2e/c4PpszjuTvG71+euS/Nme469Z/WvQF2X35vkPrn5c82hVvWJ1L9GPXPA9T6z1g0ne0158eeNW/91/K8l93f2Le75kLw+RqrpudQU3VfUlSb47yf2xj4dKd7+xu6/v7uPZ/bPuPd39Q7GPh0pVPauqnv3E50m+J8k9sY9zdPfGHkleleTvk/xjkp/d5NoeB96TP0jyyST/k92/Ub4+u/cDvTvJP6w+PnfP+T+72r8Hkrxyz/Ht7P7P/49Jfi3//0Yiz0zyx9m9Af+DSV58tX/PEx9Jvi27P+L6aJK7Vo9X2cvD9UjyjUk+vNrHe5L83Oq4fTykjyTfkeRd9vHwPZK8OMlHVo97n+gW+zjn4R3PAAAYxzueAQAwjsgFAGAckQsAwDgiFwCAcUQuAADjiFwAAMYRuQAAjCNyAQAY5/8AC2dAaR1IUEkAAAAASUVORK5CYII=\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "(df_ipl\n", + " runs.value_counts(sort=False)\n", + " .plot.barh(figsize=(12,8))\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "https://stackoverflow.com/a/30874820/8210613" + ] + }, + { + "cell_type": "code", + "execution_count": 67, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 67, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "\n", + "(df_ipl\n", + " .groupby(['inning'])\n", + " .runs.value_counts(sort=False)\n", + " .unstack().T\n", + " .plot.barh(figsize=(12,8))\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## IPL seasons are held every year now let's look at our data and extract how many seasons and which year were they played?\n" + ] + }, + { + "cell_type": "code", + "execution_count": 73, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 2009\n", + "1 2009\n", + "2 2009\n", + "3 2009\n", + "4 2009\n", + " ... \n", + "136517 2008\n", + "136518 2008\n", + "136519 2008\n", + "136520 2008\n", + "136521 2008\n", + "Name: date, Length: 136522, dtype: int64" + ] + }, + "execution_count": 73, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_ipl.date.astype('datetime64[ns]').dt.year" + ] + }, + { + "cell_type": "code", + "execution_count": 78, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 2009\n", + "1 2009\n", + "2 2009\n", + "3 2009\n", + "4 2009\n", + " ... \n", + "136517 2008\n", + "136518 2008\n", + "136519 2008\n", + "136520 2008\n", + "136521 2008\n", + "Name: year, Length: 136522, dtype: object" + ] + }, + "execution_count": 78, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_ipl['year'] = df_ipl.date.apply(lambda row: row[:4])\n", + "df_ipl['year']" + ] + }, + { + "cell_type": "code", + "execution_count": 80, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "9" + ] + }, + "execution_count": 80, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_ipl.year.nunique()" + ] + }, + { + "cell_type": "code", + "execution_count": 85, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array(['2008', '2009', '2010', '2011', '2012', '2013', '2014', '2015',\n", + " '2016'], dtype=object)" + ] + }, + "execution_count": 85, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_ipl.year.sort_values().unique()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Find out the total number of matches played in each season also find the total number of runs scored in each season.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 87, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 87, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "df_ipl.groupby('year').match_code.nunique().plot.barh()" + ] + }, + { + "cell_type": "code", + "execution_count": 90, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 90, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "df_ipl.groupby('year').total.sum().plot.barh()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## There are teams which are high performing and low performing. Let's look at the aspect of performance of an individual team. Filter the data and aggregate the runs scored by each team. Display top 10 results which are having runs scored over 200." + ] + }, + { + "cell_type": "code", + "execution_count": 98, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
yearmatch_codeinningteam1team2toss_winnertoss_decisiontotal
70120135980271Royal Challengers BangalorePune WarriorsPune Warriorsfield263
111820169809871Royal Challengers BangaloreGujarat LionsGujarat Lionsfield248
29220104191371Chennai Super KingsRajasthan RoyalsChennai Super Kingsbat246
220083359831Kings XI PunjabChennai Super KingsChennai Super Kingsbat240
100220158297951Mumbai IndiansRoyal Challengers BangaloreRoyal Challengers Bangalorebat235
47220115012601Kings XI PunjabRoyal Challengers BangaloreKings XI Punjabbat232
39820115012231Delhi DaredevilsKings XI PunjabKings XI Punjabfield231
85120147339871Kings XI PunjabChennai Super KingsChennai Super Kingsfield231
103820169809071Royal Challengers BangaloreSunrisers HyderabadSunrisers Hyderabadfield227
91120147340471Chennai Super KingsKings XI PunjabChennai Super Kingsfield226
\n", + "
" + ], + "text/plain": [ + " year match_code inning team1 \\\n", + "701 2013 598027 1 Royal Challengers Bangalore \n", + "1118 2016 980987 1 Royal Challengers Bangalore \n", + "292 2010 419137 1 Chennai Super Kings \n", + "2 2008 335983 1 Kings XI Punjab \n", + "1002 2015 829795 1 Mumbai Indians \n", + "472 2011 501260 1 Kings XI Punjab \n", + "398 2011 501223 1 Delhi Daredevils \n", + "851 2014 733987 1 Kings XI Punjab \n", + "1038 2016 980907 1 Royal Challengers Bangalore \n", + "911 2014 734047 1 Chennai Super Kings \n", + "\n", + " team2 toss_winner toss_decision \\\n", + "701 Pune Warriors Pune Warriors field \n", + "1118 Gujarat Lions Gujarat Lions field \n", + "292 Rajasthan Royals Chennai Super Kings bat \n", + "2 Chennai Super Kings Chennai Super Kings bat \n", + "1002 Royal Challengers Bangalore Royal Challengers Bangalore bat \n", + "472 Royal Challengers Bangalore Kings XI Punjab bat \n", + "398 Kings XI Punjab Kings XI Punjab field \n", + "851 Chennai Super Kings Chennai Super Kings field \n", + "1038 Sunrisers Hyderabad Sunrisers Hyderabad field \n", + "911 Kings XI Punjab Chennai Super Kings field \n", + "\n", + " total \n", + "701 263 \n", + "1118 248 \n", + "292 246 \n", + "2 240 \n", + "1002 235 \n", + "472 232 \n", + "398 231 \n", + "851 231 \n", + "1038 227 \n", + "911 226 " + ] + }, + "execution_count": 98, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "total_scores = df_ipl.groupby(['year','match_code','inning','team1','team2','toss_winner','toss_decision']).total.sum().reset_index()\n", + "high_scores = total_scores[total_scores.total>200]\n", + "high_scores.nlargest(10,columns='total')" + ] + }, + { + "cell_type": "code", + "execution_count": 122, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
match_codeinningteam1team2total
7015980271Royal Challengers BangalorePune Warriors263
11189809871Royal Challengers BangaloreGujarat Lions248
2924191371Chennai Super KingsRajasthan Royals246
23359831Kings XI PunjabChennai Super Kings240
10028297951Mumbai IndiansRoyal Challengers Bangalore235
4725012601Kings XI PunjabRoyal Challengers Bangalore232
3985012231Delhi DaredevilsKings XI Punjab231
8517339871Kings XI PunjabChennai Super Kings231
10389809071Royal Challengers BangaloreSunrisers Hyderabad227
9117340471Chennai Super KingsKings XI Punjab226
\n", + "
" + ], + "text/plain": [ + " match_code inning team1 \\\n", + "701 598027 1 Royal Challengers Bangalore \n", + "1118 980987 1 Royal Challengers Bangalore \n", + "292 419137 1 Chennai Super Kings \n", + "2 335983 1 Kings XI Punjab \n", + "1002 829795 1 Mumbai Indians \n", + "472 501260 1 Kings XI Punjab \n", + "398 501223 1 Delhi Daredevils \n", + "851 733987 1 Kings XI Punjab \n", + "1038 980907 1 Royal Challengers Bangalore \n", + "911 734047 1 Chennai Super Kings \n", + "\n", + " team2 total \n", + "701 Pune Warriors 263 \n", + "1118 Gujarat Lions 248 \n", + "292 Rajasthan Royals 246 \n", + "2 Chennai Super Kings 240 \n", + "1002 Royal Challengers Bangalore 235 \n", + "472 Royal Challengers Bangalore 232 \n", + "398 Kings XI Punjab 231 \n", + "851 Chennai Super Kings 231 \n", + "1038 Sunrisers Hyderabad 227 \n", + "911 Kings XI Punjab 226 " + ] + }, + "execution_count": 122, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "total_scores = df_ipl.groupby(['match_code','inning','team1','team2']).total.sum().reset_index()\n", + "high_scores = total_scores[total_scores.total>=200]\n", + "high_scores.nlargest(10,columns='total')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Chasing a 200+ target is difficulty in T-20 format. What are the chances that a team scoring runs above 200 in their 1st inning is chased by the opposition in 2nd inning.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 123, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
team1team2total_inn1total_inn2
match_code
335983Kings XI PunjabChennai Super Kings240207
335989Chennai Super KingsMumbai Indians208202
335990Deccan ChargersRajasthan Royals214217
336033Chennai Super KingsRajasthan Royals211201
419107Mumbai IndiansRajasthan Royals212208
419112Royal Challengers BangaloreKings XI Punjab203204
419137Chennai Super KingsRajasthan Royals246223
419139Kolkata Knight RidersKings XI Punjab200204
501223Delhi DaredevilsKings XI Punjab231202
548318Chennai Super KingsRoyal Challengers Bangalore205208
729283Chennai Super KingsKings XI Punjab205206
734007Sunrisers HyderabadKings XI Punjab205211
734047Chennai Super KingsKings XI Punjab226202
981019Royal Challengers BangaloreSunrisers Hyderabad208200
\n", + "
" + ], + "text/plain": [ + " team1 team2 \\\n", + "match_code \n", + "335983 Kings XI Punjab Chennai Super Kings \n", + "335989 Chennai Super Kings Mumbai Indians \n", + "335990 Deccan Chargers Rajasthan Royals \n", + "336033 Chennai Super Kings Rajasthan Royals \n", + "419107 Mumbai Indians Rajasthan Royals \n", + "419112 Royal Challengers Bangalore Kings XI Punjab \n", + "419137 Chennai Super Kings Rajasthan Royals \n", + "419139 Kolkata Knight Riders Kings XI Punjab \n", + "501223 Delhi Daredevils Kings XI Punjab \n", + "548318 Chennai Super Kings Royal Challengers Bangalore \n", + "729283 Chennai Super Kings Kings XI Punjab \n", + "734007 Sunrisers Hyderabad Kings XI Punjab \n", + "734047 Chennai Super Kings Kings XI Punjab \n", + "981019 Royal Challengers Bangalore Sunrisers Hyderabad \n", + "\n", + " total_inn1 total_inn2 \n", + "match_code \n", + "335983 240 207 \n", + "335989 208 202 \n", + "335990 214 217 \n", + "336033 211 201 \n", + "419107 212 208 \n", + "419112 203 204 \n", + "419137 246 223 \n", + "419139 200 204 \n", + "501223 231 202 \n", + "548318 205 208 \n", + "729283 205 206 \n", + "734007 205 211 \n", + "734047 226 202 \n", + "981019 208 200 " + ] + }, + "execution_count": 123, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "high_scores1 = high_scores.loc[high_scores.inning==1,:]\n", + "high_scores2 = high_scores.loc[high_scores.inning==2,:]\n", + "\n", + "\n", + "(high_scores1.set_index('match_code').drop(columns='inning')\n", + " .join(high_scores2.set_index('match_code')[['total']],\n", + " lsuffix='_inn1',\n", + " rsuffix='_inn2',\n", + " how=\"inner\")\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 124, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "42.86" + ] + }, + "execution_count": 124, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "high_score_matches = high_scores1.drop(columns='inning').merge(high_scores2[['match_code','total']],on='match_code',suffixes=('_inn1','_inn2'))\n", + "\n", + "high_score_matches['is_score_chased'] = high_score_matches.total_inn2 > high_score_matches.total_inn1\n", + "high_score_matches.is_score_chased.value_counts(normalize=True).multiply(100).round(2)[True]\n" + ] + }, + { + "cell_type": "code", + "execution_count": 125, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "False 8\n", + "True 6\n", + "Name: is_score_chased, dtype: int64" + ] + }, + "execution_count": 125, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "high_score_matches.is_score_chased.value_counts()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Every season has that one team which is outperforming others and is in great form. Which team has the highest win counts in their respective seasons ?\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 169, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "year winner \n", + "2008 Rajasthan Royals 13\n", + " Kings XI Punjab 10\n", + " Chennai Super Kings 9\n", + " Delhi Daredevils 7\n", + " Mumbai Indians 7\n", + " ..\n", + "2016 Kolkata Knight Riders 8\n", + " Delhi Daredevils 7\n", + " Mumbai Indians 7\n", + " Rising Pune Supergiants 5\n", + " Kings XI Punjab 4\n", + "Name: winner, Length: 76, dtype: int64" + ] + }, + "execution_count": 169, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "match_wise_data = df_ipl.drop_duplicates(subset = 'match_code', keep='first').reset_index(drop=True)\n", + "match_wise_data.groupby('year')['winner'].value_counts(ascending=False)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "https://stackoverflow.com/a/22720517/8210613" + ] + }, + { + "cell_type": "code", + "execution_count": 168, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "year\n", + "2008 (Rajasthan Royals, 13)\n", + "2009 (Delhi Daredevils, 10)\n", + "2010 (Mumbai Indians, 11)\n", + "2011 (Chennai Super Kings, 11)\n", + "2012 (Kolkata Knight Riders, 12)\n", + "2013 (Mumbai Indians, 13)\n", + "2014 (Kings XI Punjab, 12)\n", + "2015 (Mumbai Indians, 10)\n", + "2016 (Sunrisers Hyderabad, 11)\n", + "dtype: object" + ] + }, + "execution_count": 168, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "(df_ipl\n", + " .drop_duplicates(subset='match_code')\n", + " .reset_index(drop=True)\n", + " .groupby('year')\n", + " .apply(lambda group: (group['winner'].value_counts().index[0],group['winner'].value_counts()[0] ))\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "https://stackoverflow.com/a/10762516/8210613" + ] + }, + { + "cell_type": "code", + "execution_count": 180, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
winnercount
year
2008Rajasthan Royals13
2009Delhi Daredevils10
2010Mumbai Indians11
2011Chennai Super Kings11
2012Kolkata Knight Riders12
2013Mumbai Indians13
2014Kings XI Punjab12
2015Mumbai Indians10
2016Sunrisers Hyderabad11
\n", + "
" + ], + "text/plain": [ + " winner count\n", + "year \n", + "2008 Rajasthan Royals 13\n", + "2009 Delhi Daredevils 10\n", + "2010 Mumbai Indians 11\n", + "2011 Chennai Super Kings 11\n", + "2012 Kolkata Knight Riders 12\n", + "2013 Mumbai Indians 13\n", + "2014 Kings XI Punjab 12\n", + "2015 Mumbai Indians 10\n", + "2016 Sunrisers Hyderabad 11" + ] + }, + "execution_count": 180, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "(df_ipl\n", + " .drop_duplicates(subset='match_code')\n", + " .reset_index(drop=True)\n", + " .groupby('year')\n", + " .apply(lambda group: pd.Series( (group['winner'].value_counts().index[0],group['winner'].value_counts()[0]),\n", + " index=['winner','count']))\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 177, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
winnercount
year
2008Rajasthan Royals13
2009Delhi Daredevils10
2010Mumbai Indians11
2011Chennai Super Kings11
2012Kolkata Knight Riders12
2013Mumbai Indians13
2014Kings XI Punjab12
2015Mumbai Indians10
2016Sunrisers Hyderabad11
\n", + "
" + ], + "text/plain": [ + " winner count\n", + "year \n", + "2008 Rajasthan Royals 13\n", + "2009 Delhi Daredevils 10\n", + "2010 Mumbai Indians 11\n", + "2011 Chennai Super Kings 11\n", + "2012 Kolkata Knight Riders 12\n", + "2013 Mumbai Indians 13\n", + "2014 Kings XI Punjab 12\n", + "2015 Mumbai Indians 10\n", + "2016 Sunrisers Hyderabad 11" + ] + }, + "execution_count": 177, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "def get_max(group):\n", + " vc = group['winner'].value_counts()\n", + " return pd.Series([vc.index[0], vc[0]], index=['winner', 'count'])\n", + " \n", + "(df_ipl\n", + " .drop_duplicates(subset='match_code')\n", + " .reset_index(drop=True)\n", + " .groupby('year')\n", + " .apply(get_max)\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.3" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/Indian_rainfall_analysis/notebook/Indian_rainfall_analysis-MK.ipynb b/Indian_rainfall_analysis/notebook/Indian_rainfall_analysis-MK.ipynb new file mode 100644 index 0000000..663ff11 --- /dev/null +++ b/Indian_rainfall_analysis/notebook/Indian_rainfall_analysis-MK.ipynb @@ -0,0 +1,2581 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Indian Rainfall Analysis\n", + "\n", + "The 2019 Indian floods were a series of floods that affected over thirteen states in late July and early August 2019, due to incessant rains. At least 200 people died and about a million people were displaced. Karnataka and Maharashtra were the most severely affected states.\n", + "\n", + "It was the heaviest monsoon in the last 25 years. More than 1600 people died between June and October 2019.\n", + "\n", + "India being an agriculturally driven economy, it will be interesting to study the rainfall in India in the past decade to give us an idea of the changes in the pattern if there are any.\n", + "\n", + "\n", + "Source: [Open Gov Data Platform India - data.gov.in](https://data.gov.in/resources/subdivision-wise-rainfall-and-its-departure-1901-2015)\n", + "\n", + "Let us work on the INDIAN RAINFALL DATA!" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Import libraries" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import matplotlib.pyplot as plt\n", + "import seaborn as sns" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Read the data" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
SUBDIVISIONYEARJANFEBMARAPRMAYJUNJULAUGSEPOCTNOVDECANNUALJan-FebMar-MayJun-SepOct-Dec
0ANDAMAN & NICOBAR ISLANDS190149.287.129.22.3528.8517.5365.1481.1332.6388.5558.233.63373.2136.3560.31696.3980.3
1ANDAMAN & NICOBAR ISLANDS19020.0159.812.20.0446.1537.1228.9753.7666.2197.2359.0160.53520.7159.8458.32185.9716.7
2ANDAMAN & NICOBAR ISLANDS190312.7144.00.01.0235.1479.9728.4326.7339.0181.2284.4225.02957.4156.7236.11874.0690.6
3ANDAMAN & NICOBAR ISLANDS19049.414.70.0202.4304.5495.1502.0160.1820.4222.2308.740.13079.624.1506.91977.6571.0
4ANDAMAN & NICOBAR ISLANDS19051.30.03.326.9279.5628.7368.7330.5297.0260.725.4344.72566.71.3309.71624.9630.8
\n", + "
" + ], + "text/plain": [ + " SUBDIVISION YEAR JAN FEB MAR APR MAY JUN \\\n", + "0 ANDAMAN & NICOBAR ISLANDS 1901 49.2 87.1 29.2 2.3 528.8 517.5 \n", + "1 ANDAMAN & NICOBAR ISLANDS 1902 0.0 159.8 12.2 0.0 446.1 537.1 \n", + "2 ANDAMAN & NICOBAR ISLANDS 1903 12.7 144.0 0.0 1.0 235.1 479.9 \n", + "3 ANDAMAN & NICOBAR ISLANDS 1904 9.4 14.7 0.0 202.4 304.5 495.1 \n", + "4 ANDAMAN & NICOBAR ISLANDS 1905 1.3 0.0 3.3 26.9 279.5 628.7 \n", + "\n", + " JUL AUG SEP OCT NOV DEC ANNUAL Jan-Feb Mar-May \\\n", + "0 365.1 481.1 332.6 388.5 558.2 33.6 3373.2 136.3 560.3 \n", + "1 228.9 753.7 666.2 197.2 359.0 160.5 3520.7 159.8 458.3 \n", + "2 728.4 326.7 339.0 181.2 284.4 225.0 2957.4 156.7 236.1 \n", + "3 502.0 160.1 820.4 222.2 308.7 40.1 3079.6 24.1 506.9 \n", + "4 368.7 330.5 297.0 260.7 25.4 344.7 2566.7 1.3 309.7 \n", + "\n", + " Jun-Sep Oct-Dec \n", + "0 1696.3 980.3 \n", + "1 2185.9 716.7 \n", + "2 1874.0 690.6 \n", + "3 1977.6 571.0 \n", + "4 1624.9 630.8 " + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data = pd.read_csv('../data/rainfall in india 1901-2015.csv')\n", + "data.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Summary of the data" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "RangeIndex: 4116 entries, 0 to 4115\n", + "Data columns (total 19 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 SUBDIVISION 4116 non-null object \n", + " 1 YEAR 4116 non-null int64 \n", + " 2 JAN 4112 non-null float64\n", + " 3 FEB 4113 non-null float64\n", + " 4 MAR 4110 non-null float64\n", + " 5 APR 4112 non-null float64\n", + " 6 MAY 4113 non-null float64\n", + " 7 JUN 4111 non-null float64\n", + " 8 JUL 4109 non-null float64\n", + " 9 AUG 4112 non-null float64\n", + " 10 SEP 4110 non-null float64\n", + " 11 OCT 4109 non-null float64\n", + " 12 NOV 4105 non-null float64\n", + " 13 DEC 4106 non-null float64\n", + " 14 ANNUAL 4090 non-null float64\n", + " 15 Jan-Feb 4110 non-null float64\n", + " 16 Mar-May 4107 non-null float64\n", + " 17 Jun-Sep 4106 non-null float64\n", + " 18 Oct-Dec 4103 non-null float64\n", + "dtypes: float64(17), int64(1), object(1)\n", + "memory usage: 611.1+ KB\n" + ] + } + ], + "source": [ + "data.info()" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "SUBDIVISION 0\n", + "YEAR 0\n", + "JAN 4\n", + "FEB 3\n", + "MAR 6\n", + "APR 4\n", + "MAY 3\n", + "JUN 5\n", + "JUL 7\n", + "AUG 4\n", + "SEP 6\n", + "OCT 7\n", + "NOV 11\n", + "DEC 10\n", + "ANNUAL 26\n", + "Jan-Feb 6\n", + "Mar-May 9\n", + "Jun-Sep 10\n", + "Oct-Dec 13\n", + "dtype: int64" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data.isnull().sum()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Finding years where all months data is missing" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Series([], Name: YEAR, dtype: int64)" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data.YEAR[data.iloc[:,2:14].isnull().all(axis=1)]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "There is no year (or row) in the dataset where all the measurements from Jan-Dec are missing" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "# can might handle missing values in ANNUAL column by adding it ourselves\n", + "data['MY_ANNUAL'] = data.iloc[:,2:14].sum(axis=1)" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "SUBDIVISION 0\n", + "YEAR 0\n", + "JAN 4\n", + "FEB 3\n", + "MAR 6\n", + "APR 4\n", + "MAY 3\n", + "JUN 5\n", + "JUL 7\n", + "AUG 4\n", + "SEP 6\n", + "OCT 7\n", + "NOV 11\n", + "DEC 10\n", + "ANNUAL 26\n", + "Jan-Feb 6\n", + "Mar-May 9\n", + "Jun-Sep 10\n", + "Oct-Dec 13\n", + "MY_ANNUAL 0\n", + "dtype: int64" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data.isnull().sum()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "However, a better way in this analysis would be to replace it mean." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Inspect the data, find missing values and replace them with appropriate values" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
YEARJANFEBMARAPRMAYJUNJULAUGSEPOCTNOVDECANNUALJan-FebMar-MayJun-SepOct-DecMY_ANNUAL
SUBDIVISION
ANDAMAN & NICOBAR ISLANDS1958.91818252.63727327.99454531.82407472.223148357.056881471.580556400.042593400.047222439.482243290.264815233.744444153.1448602927.43942380.632727462.2495331706.687850675.4168222876.093636
ARUNACHAL PRADESH1965.82474247.29791791.116667153.527368263.836082358.522680647.373958694.544792495.229897432.134021194.68631635.69684224.5021053418.857143138.416667777.6863162271.422105254.5138303414.786598
ASSAM & MEGHALAYA1958.00000016.97478331.44173979.026957203.115652341.539130510.161739495.102609404.593043310.734783152.11826126.9382618.9513042580.69565248.413043623.6878261720.590435188.0156522580.698261
BIHAR1958.00000013.38608714.39391310.12434816.91826153.081739174.315652324.441739299.643478217.38434863.0747837.1782613.6947831197.63391327.77652280.1269571015.78608773.9539131197.637391
CHHATTISGARH1958.00000014.20695719.25913015.26695716.77304321.048696198.266087398.577391389.873043217.78000063.66000011.7721745.2486961371.72869633.46260953.0921741204.50087080.6747831371.732174
COASTAL ANDHRA PRADESH1958.0000007.48347812.92347813.22173926.74087062.549565123.693913173.824348175.923478181.707826185.51130477.90347811.4200001052.90434820.404348102.515652655.141739274.8356521052.903478
COASTAL KARNATAKA1958.0000001.9377191.5182616.35739130.916522122.787826841.3260871127.028696713.618261299.652174184.55217463.60782612.6139133408.4096493.371053160.0513042981.618261260.7756523405.900000
EAST MADHYA PRADESH1958.00000019.40173918.69391313.6373917.1886969.273043141.029565371.378261369.368696194.23652239.68608712.7052178.4043481205.00000038.09478330.0965221076.01826160.8000001205.003478
EAST RAJASTHAN1958.0000006.4226095.4173914.5165223.1443489.82000063.399130223.347826218.27739197.97826114.3608704.8739133.651304655.21565211.83739117.487826602.99826122.887826655.209565
EAST UTTAR PRADESH1958.00000016.01217415.8739138.9078266.43043517.211304110.712174290.568696275.613913184.59130442.9208704.5904355.776522979.21304331.88782632.553913861.48608753.293913979.209565
GANGETIC WEST BENGAL1958.00000012.59565222.45217429.09043544.885217107.787826247.196522326.377391311.382609245.710435115.74608721.5791305.6904351490.48782635.042609181.7660871130.657391143.0182611490.493913
GUJARAT REGION1958.0000001.7860871.1913041.2208701.1165225.809565121.284348348.920870259.193043148.84173920.5652176.9286961.339130918.2304352.9773918.161739878.25130428.834783918.197391
HARYANA DELHI & CHANDIGARH1958.00000016.88956517.43391312.9356527.63391314.53391348.626087150.015652150.84087088.30695712.8234783.2643487.186087530.49652234.33130435.112174437.78695723.270435530.490435
HIMACHAL PRADESH1958.00000084.18956590.894783101.14608762.42869658.15652291.220870280.284348273.933043130.21913031.27826116.69565239.8930431260.345217175.082609221.726957775.66434887.8713041260.340000
JAMMU & KASHMIR1958.000000102.030435115.450435131.37826193.70260967.47652264.234783179.837719180.97304389.28956534.16695724.13333355.4254391139.684211217.482609292.552174515.428070113.9596491135.843478
JHARKHAND1958.00000017.62173924.18608718.42347819.36695748.317391194.588696336.975652325.524348227.42173980.01565211.9234784.9391301309.30347841.80956586.0956521084.51043596.8808701309.304348
KERALA1958.00000012.24695715.49652236.814783110.573913229.881739654.302609700.953043421.977391245.619130294.122609163.56000039.9504352925.48782627.739130377.2539132022.840870497.6365222925.499130
KONKAN & GOA1958.0000001.2626090.5469571.3747834.26608733.515652688.5695651073.030435682.756522349.780000113.38695724.6713044.5165222977.6860871.81304339.1617392794.130435142.5791302977.677391
LAKSHADWEEP1958.35087727.49464315.83451314.35089345.163393163.893750327.627679281.928829207.993750163.170270166.727928124.84074160.8109091590.88640842.500000223.822727983.554545355.3870371561.094737
MADHYA MAHARASHTRA1958.0000003.0547831.4678263.5965229.14695722.943478147.426087248.980000184.397391157.22173970.19478325.9452175.848696880.2330434.52608735.692174738.025217101.986087880.223478
MATATHWADA1958.0000005.0008704.4434787.1052177.59478315.646957136.957391180.648696166.484348178.47652258.58000022.4365227.302609790.6921749.44869630.352174662.56782688.319130790.677391
NAGA MANI MIZO TRIPURA1958.00000014.02521736.65217477.199130170.733043290.839130445.633913438.684348411.281739314.350435175.00608746.83391312.3991302433.61913050.669565538.7695651609.941739234.2400002433.638261
NORTH INTERIOR KARNATAKA1958.0000003.0130433.1721747.12347824.30087047.035652100.993043138.531304119.459130142.94087095.68869629.2078266.327826717.7956526.18434878.460870501.927826131.223478717.793913
ORISSA1958.00000012.32956519.71913021.13478334.16000064.886087210.860870351.173043355.382609241.403478113.59217427.9617395.5678261458.16956532.047826120.1843481158.817391147.1226091458.171304
PUNJAB1958.00000025.24608726.78695723.65130412.66000014.13652246.466957168.963478158.16782686.78956513.8365224.14000012.694783593.53565252.03043550.440000460.39217430.669565593.540000
RAYALSEEMA1958.0000009.8678265.6800008.07652219.80869650.47565264.74260996.081739107.511304131.720000135.327826102.65391334.260000766.20608715.54521778.358261400.058261272.241739766.206087
SAURASHTRA & KUTCH1958.0000001.1391301.6156521.2965221.1834784.66260974.371304194.970435118.77043575.41826114.5104356.0965221.108696495.1617392.7521747.140870463.53652221.718261495.143478
SOUTH INTERIOR KARNATAKA1958.0000002.9286964.1634789.48521742.28087092.100000141.417391231.359130174.239130137.313913139.14347854.43130411.5173911040.3913047.087826143.861739684.338261205.0939131040.380000
SUB HIMALAYAN WEST BENGAL & SIKKIM1958.00000014.08347822.97478343.135652110.681739269.143478537.881739646.402609520.763478421.341739143.64608716.0886966.0600002752.21739137.060870422.9686962126.391304165.8017392752.203478
TAMIL NADU1958.00000023.81913013.42260919.47565244.99565269.92087052.05652271.31478395.887826111.597391183.196522176.90347881.137391943.71304337.239130134.386087330.847826441.234783943.727826
TELANGANA1958.0000007.7026099.68869612.61478318.18521725.373913142.126087247.499130215.059130175.50347874.22695720.2504355.141739953.37826117.39652256.173043780.18695799.620870953.372174
UTTARAKHAND1958.00000053.79739163.45217457.27217435.16608755.338261162.551304390.698261382.023478196.09652239.0739138.18782622.0356521465.696522117.251304147.7739131131.36782669.3060871465.693043
VIDARBHA1958.00000010.56347811.98260911.8721749.43565211.551304173.578261329.428696285.949565175.44956552.14869615.5747837.9278261095.45913022.54434832.851304964.40347875.6547831095.462609
WEST MADHYA PRADESH1958.0000009.2417396.3078955.1730432.3756527.657391111.781739302.982609288.108696161.16869628.08695712.3408706.296522944.35877215.63859615.217391864.04347846.724348941.466957
WEST RAJASTHAN1958.0000003.3278264.9304353.9860873.5713049.44347828.63739195.17130494.55565240.3426095.1278261.6669571.902609292.6730438.25565217.006087258.7078268.700870292.663478
WEST UTTAR PRADESH1958.00000017.66608717.89391311.4617396.25304312.30608777.597391246.520000251.299130146.25478328.7773913.9660877.114783827.11478335.55478330.026087721.67652239.858261827.110435
\n", + "
" + ], + "text/plain": [ + " YEAR JAN FEB \\\n", + "SUBDIVISION \n", + "ANDAMAN & NICOBAR ISLANDS 1958.918182 52.637273 27.994545 \n", + "ARUNACHAL PRADESH 1965.824742 47.297917 91.116667 \n", + "ASSAM & MEGHALAYA 1958.000000 16.974783 31.441739 \n", + "BIHAR 1958.000000 13.386087 14.393913 \n", + "CHHATTISGARH 1958.000000 14.206957 19.259130 \n", + "COASTAL ANDHRA PRADESH 1958.000000 7.483478 12.923478 \n", + "COASTAL KARNATAKA 1958.000000 1.937719 1.518261 \n", + "EAST MADHYA PRADESH 1958.000000 19.401739 18.693913 \n", + "EAST RAJASTHAN 1958.000000 6.422609 5.417391 \n", + "EAST UTTAR PRADESH 1958.000000 16.012174 15.873913 \n", + "GANGETIC WEST BENGAL 1958.000000 12.595652 22.452174 \n", + "GUJARAT REGION 1958.000000 1.786087 1.191304 \n", + "HARYANA DELHI & CHANDIGARH 1958.000000 16.889565 17.433913 \n", + "HIMACHAL PRADESH 1958.000000 84.189565 90.894783 \n", + "JAMMU & KASHMIR 1958.000000 102.030435 115.450435 \n", + "JHARKHAND 1958.000000 17.621739 24.186087 \n", + "KERALA 1958.000000 12.246957 15.496522 \n", + "KONKAN & GOA 1958.000000 1.262609 0.546957 \n", + "LAKSHADWEEP 1958.350877 27.494643 15.834513 \n", + "MADHYA MAHARASHTRA 1958.000000 3.054783 1.467826 \n", + "MATATHWADA 1958.000000 5.000870 4.443478 \n", + "NAGA MANI MIZO TRIPURA 1958.000000 14.025217 36.652174 \n", + "NORTH INTERIOR KARNATAKA 1958.000000 3.013043 3.172174 \n", + "ORISSA 1958.000000 12.329565 19.719130 \n", + "PUNJAB 1958.000000 25.246087 26.786957 \n", + "RAYALSEEMA 1958.000000 9.867826 5.680000 \n", + "SAURASHTRA & KUTCH 1958.000000 1.139130 1.615652 \n", + "SOUTH INTERIOR KARNATAKA 1958.000000 2.928696 4.163478 \n", + "SUB HIMALAYAN WEST BENGAL & SIKKIM 1958.000000 14.083478 22.974783 \n", + "TAMIL NADU 1958.000000 23.819130 13.422609 \n", + "TELANGANA 1958.000000 7.702609 9.688696 \n", + "UTTARAKHAND 1958.000000 53.797391 63.452174 \n", + "VIDARBHA 1958.000000 10.563478 11.982609 \n", + "WEST MADHYA PRADESH 1958.000000 9.241739 6.307895 \n", + "WEST RAJASTHAN 1958.000000 3.327826 4.930435 \n", + "WEST UTTAR PRADESH 1958.000000 17.666087 17.893913 \n", + "\n", + " MAR APR MAY \\\n", + "SUBDIVISION \n", + "ANDAMAN & NICOBAR ISLANDS 31.824074 72.223148 357.056881 \n", + "ARUNACHAL PRADESH 153.527368 263.836082 358.522680 \n", + "ASSAM & MEGHALAYA 79.026957 203.115652 341.539130 \n", + "BIHAR 10.124348 16.918261 53.081739 \n", + "CHHATTISGARH 15.266957 16.773043 21.048696 \n", + "COASTAL ANDHRA PRADESH 13.221739 26.740870 62.549565 \n", + "COASTAL KARNATAKA 6.357391 30.916522 122.787826 \n", + "EAST MADHYA PRADESH 13.637391 7.188696 9.273043 \n", + "EAST RAJASTHAN 4.516522 3.144348 9.820000 \n", + "EAST UTTAR PRADESH 8.907826 6.430435 17.211304 \n", + "GANGETIC WEST BENGAL 29.090435 44.885217 107.787826 \n", + "GUJARAT REGION 1.220870 1.116522 5.809565 \n", + "HARYANA DELHI & CHANDIGARH 12.935652 7.633913 14.533913 \n", + "HIMACHAL PRADESH 101.146087 62.428696 58.156522 \n", + "JAMMU & KASHMIR 131.378261 93.702609 67.476522 \n", + "JHARKHAND 18.423478 19.366957 48.317391 \n", + "KERALA 36.814783 110.573913 229.881739 \n", + "KONKAN & GOA 1.374783 4.266087 33.515652 \n", + "LAKSHADWEEP 14.350893 45.163393 163.893750 \n", + "MADHYA MAHARASHTRA 3.596522 9.146957 22.943478 \n", + "MATATHWADA 7.105217 7.594783 15.646957 \n", + "NAGA MANI MIZO TRIPURA 77.199130 170.733043 290.839130 \n", + "NORTH INTERIOR KARNATAKA 7.123478 24.300870 47.035652 \n", + "ORISSA 21.134783 34.160000 64.886087 \n", + "PUNJAB 23.651304 12.660000 14.136522 \n", + "RAYALSEEMA 8.076522 19.808696 50.475652 \n", + "SAURASHTRA & KUTCH 1.296522 1.183478 4.662609 \n", + "SOUTH INTERIOR KARNATAKA 9.485217 42.280870 92.100000 \n", + "SUB HIMALAYAN WEST BENGAL & SIKKIM 43.135652 110.681739 269.143478 \n", + "TAMIL NADU 19.475652 44.995652 69.920870 \n", + "TELANGANA 12.614783 18.185217 25.373913 \n", + "UTTARAKHAND 57.272174 35.166087 55.338261 \n", + "VIDARBHA 11.872174 9.435652 11.551304 \n", + "WEST MADHYA PRADESH 5.173043 2.375652 7.657391 \n", + "WEST RAJASTHAN 3.986087 3.571304 9.443478 \n", + "WEST UTTAR PRADESH 11.461739 6.253043 12.306087 \n", + "\n", + " JUN JUL AUG \\\n", + "SUBDIVISION \n", + "ANDAMAN & NICOBAR ISLANDS 471.580556 400.042593 400.047222 \n", + "ARUNACHAL PRADESH 647.373958 694.544792 495.229897 \n", + "ASSAM & MEGHALAYA 510.161739 495.102609 404.593043 \n", + "BIHAR 174.315652 324.441739 299.643478 \n", + "CHHATTISGARH 198.266087 398.577391 389.873043 \n", + "COASTAL ANDHRA PRADESH 123.693913 173.824348 175.923478 \n", + "COASTAL KARNATAKA 841.326087 1127.028696 713.618261 \n", + "EAST MADHYA PRADESH 141.029565 371.378261 369.368696 \n", + "EAST RAJASTHAN 63.399130 223.347826 218.277391 \n", + "EAST UTTAR PRADESH 110.712174 290.568696 275.613913 \n", + "GANGETIC WEST BENGAL 247.196522 326.377391 311.382609 \n", + "GUJARAT REGION 121.284348 348.920870 259.193043 \n", + "HARYANA DELHI & CHANDIGARH 48.626087 150.015652 150.840870 \n", + "HIMACHAL PRADESH 91.220870 280.284348 273.933043 \n", + "JAMMU & KASHMIR 64.234783 179.837719 180.973043 \n", + "JHARKHAND 194.588696 336.975652 325.524348 \n", + "KERALA 654.302609 700.953043 421.977391 \n", + "KONKAN & GOA 688.569565 1073.030435 682.756522 \n", + "LAKSHADWEEP 327.627679 281.928829 207.993750 \n", + "MADHYA MAHARASHTRA 147.426087 248.980000 184.397391 \n", + "MATATHWADA 136.957391 180.648696 166.484348 \n", + "NAGA MANI MIZO TRIPURA 445.633913 438.684348 411.281739 \n", + "NORTH INTERIOR KARNATAKA 100.993043 138.531304 119.459130 \n", + "ORISSA 210.860870 351.173043 355.382609 \n", + "PUNJAB 46.466957 168.963478 158.167826 \n", + "RAYALSEEMA 64.742609 96.081739 107.511304 \n", + "SAURASHTRA & KUTCH 74.371304 194.970435 118.770435 \n", + "SOUTH INTERIOR KARNATAKA 141.417391 231.359130 174.239130 \n", + "SUB HIMALAYAN WEST BENGAL & SIKKIM 537.881739 646.402609 520.763478 \n", + "TAMIL NADU 52.056522 71.314783 95.887826 \n", + "TELANGANA 142.126087 247.499130 215.059130 \n", + "UTTARAKHAND 162.551304 390.698261 382.023478 \n", + "VIDARBHA 173.578261 329.428696 285.949565 \n", + "WEST MADHYA PRADESH 111.781739 302.982609 288.108696 \n", + "WEST RAJASTHAN 28.637391 95.171304 94.555652 \n", + "WEST UTTAR PRADESH 77.597391 246.520000 251.299130 \n", + "\n", + " SEP OCT NOV \\\n", + "SUBDIVISION \n", + "ANDAMAN & NICOBAR ISLANDS 439.482243 290.264815 233.744444 \n", + "ARUNACHAL PRADESH 432.134021 194.686316 35.696842 \n", + "ASSAM & MEGHALAYA 310.734783 152.118261 26.938261 \n", + "BIHAR 217.384348 63.074783 7.178261 \n", + "CHHATTISGARH 217.780000 63.660000 11.772174 \n", + "COASTAL ANDHRA PRADESH 181.707826 185.511304 77.903478 \n", + "COASTAL KARNATAKA 299.652174 184.552174 63.607826 \n", + "EAST MADHYA PRADESH 194.236522 39.686087 12.705217 \n", + "EAST RAJASTHAN 97.978261 14.360870 4.873913 \n", + "EAST UTTAR PRADESH 184.591304 42.920870 4.590435 \n", + "GANGETIC WEST BENGAL 245.710435 115.746087 21.579130 \n", + "GUJARAT REGION 148.841739 20.565217 6.928696 \n", + "HARYANA DELHI & CHANDIGARH 88.306957 12.823478 3.264348 \n", + "HIMACHAL PRADESH 130.219130 31.278261 16.695652 \n", + "JAMMU & KASHMIR 89.289565 34.166957 24.133333 \n", + "JHARKHAND 227.421739 80.015652 11.923478 \n", + "KERALA 245.619130 294.122609 163.560000 \n", + "KONKAN & GOA 349.780000 113.386957 24.671304 \n", + "LAKSHADWEEP 163.170270 166.727928 124.840741 \n", + "MADHYA MAHARASHTRA 157.221739 70.194783 25.945217 \n", + "MATATHWADA 178.476522 58.580000 22.436522 \n", + "NAGA MANI MIZO TRIPURA 314.350435 175.006087 46.833913 \n", + "NORTH INTERIOR KARNATAKA 142.940870 95.688696 29.207826 \n", + "ORISSA 241.403478 113.592174 27.961739 \n", + "PUNJAB 86.789565 13.836522 4.140000 \n", + "RAYALSEEMA 131.720000 135.327826 102.653913 \n", + "SAURASHTRA & KUTCH 75.418261 14.510435 6.096522 \n", + "SOUTH INTERIOR KARNATAKA 137.313913 139.143478 54.431304 \n", + "SUB HIMALAYAN WEST BENGAL & SIKKIM 421.341739 143.646087 16.088696 \n", + "TAMIL NADU 111.597391 183.196522 176.903478 \n", + "TELANGANA 175.503478 74.226957 20.250435 \n", + "UTTARAKHAND 196.096522 39.073913 8.187826 \n", + "VIDARBHA 175.449565 52.148696 15.574783 \n", + "WEST MADHYA PRADESH 161.168696 28.086957 12.340870 \n", + "WEST RAJASTHAN 40.342609 5.127826 1.666957 \n", + "WEST UTTAR PRADESH 146.254783 28.777391 3.966087 \n", + "\n", + " DEC ANNUAL Jan-Feb \\\n", + "SUBDIVISION \n", + "ANDAMAN & NICOBAR ISLANDS 153.144860 2927.439423 80.632727 \n", + "ARUNACHAL PRADESH 24.502105 3418.857143 138.416667 \n", + "ASSAM & MEGHALAYA 8.951304 2580.695652 48.413043 \n", + "BIHAR 3.694783 1197.633913 27.776522 \n", + "CHHATTISGARH 5.248696 1371.728696 33.462609 \n", + "COASTAL ANDHRA PRADESH 11.420000 1052.904348 20.404348 \n", + "COASTAL KARNATAKA 12.613913 3408.409649 3.371053 \n", + "EAST MADHYA PRADESH 8.404348 1205.000000 38.094783 \n", + "EAST RAJASTHAN 3.651304 655.215652 11.837391 \n", + "EAST UTTAR PRADESH 5.776522 979.213043 31.887826 \n", + "GANGETIC WEST BENGAL 5.690435 1490.487826 35.042609 \n", + "GUJARAT REGION 1.339130 918.230435 2.977391 \n", + "HARYANA DELHI & CHANDIGARH 7.186087 530.496522 34.331304 \n", + "HIMACHAL PRADESH 39.893043 1260.345217 175.082609 \n", + "JAMMU & KASHMIR 55.425439 1139.684211 217.482609 \n", + "JHARKHAND 4.939130 1309.303478 41.809565 \n", + "KERALA 39.950435 2925.487826 27.739130 \n", + "KONKAN & GOA 4.516522 2977.686087 1.813043 \n", + "LAKSHADWEEP 60.810909 1590.886408 42.500000 \n", + "MADHYA MAHARASHTRA 5.848696 880.233043 4.526087 \n", + "MATATHWADA 7.302609 790.692174 9.448696 \n", + "NAGA MANI MIZO TRIPURA 12.399130 2433.619130 50.669565 \n", + "NORTH INTERIOR KARNATAKA 6.327826 717.795652 6.184348 \n", + "ORISSA 5.567826 1458.169565 32.047826 \n", + "PUNJAB 12.694783 593.535652 52.030435 \n", + "RAYALSEEMA 34.260000 766.206087 15.545217 \n", + "SAURASHTRA & KUTCH 1.108696 495.161739 2.752174 \n", + "SOUTH INTERIOR KARNATAKA 11.517391 1040.391304 7.087826 \n", + "SUB HIMALAYAN WEST BENGAL & SIKKIM 6.060000 2752.217391 37.060870 \n", + "TAMIL NADU 81.137391 943.713043 37.239130 \n", + "TELANGANA 5.141739 953.378261 17.396522 \n", + "UTTARAKHAND 22.035652 1465.696522 117.251304 \n", + "VIDARBHA 7.927826 1095.459130 22.544348 \n", + "WEST MADHYA PRADESH 6.296522 944.358772 15.638596 \n", + "WEST RAJASTHAN 1.902609 292.673043 8.255652 \n", + "WEST UTTAR PRADESH 7.114783 827.114783 35.554783 \n", + "\n", + " Mar-May Jun-Sep Oct-Dec \\\n", + "SUBDIVISION \n", + "ANDAMAN & NICOBAR ISLANDS 462.249533 1706.687850 675.416822 \n", + "ARUNACHAL PRADESH 777.686316 2271.422105 254.513830 \n", + "ASSAM & MEGHALAYA 623.687826 1720.590435 188.015652 \n", + "BIHAR 80.126957 1015.786087 73.953913 \n", + "CHHATTISGARH 53.092174 1204.500870 80.674783 \n", + "COASTAL ANDHRA PRADESH 102.515652 655.141739 274.835652 \n", + "COASTAL KARNATAKA 160.051304 2981.618261 260.775652 \n", + "EAST MADHYA PRADESH 30.096522 1076.018261 60.800000 \n", + "EAST RAJASTHAN 17.487826 602.998261 22.887826 \n", + "EAST UTTAR PRADESH 32.553913 861.486087 53.293913 \n", + "GANGETIC WEST BENGAL 181.766087 1130.657391 143.018261 \n", + "GUJARAT REGION 8.161739 878.251304 28.834783 \n", + "HARYANA DELHI & CHANDIGARH 35.112174 437.786957 23.270435 \n", + "HIMACHAL PRADESH 221.726957 775.664348 87.871304 \n", + "JAMMU & KASHMIR 292.552174 515.428070 113.959649 \n", + "JHARKHAND 86.095652 1084.510435 96.880870 \n", + "KERALA 377.253913 2022.840870 497.636522 \n", + "KONKAN & GOA 39.161739 2794.130435 142.579130 \n", + "LAKSHADWEEP 223.822727 983.554545 355.387037 \n", + "MADHYA MAHARASHTRA 35.692174 738.025217 101.986087 \n", + "MATATHWADA 30.352174 662.567826 88.319130 \n", + "NAGA MANI MIZO TRIPURA 538.769565 1609.941739 234.240000 \n", + "NORTH INTERIOR KARNATAKA 78.460870 501.927826 131.223478 \n", + "ORISSA 120.184348 1158.817391 147.122609 \n", + "PUNJAB 50.440000 460.392174 30.669565 \n", + "RAYALSEEMA 78.358261 400.058261 272.241739 \n", + "SAURASHTRA & KUTCH 7.140870 463.536522 21.718261 \n", + "SOUTH INTERIOR KARNATAKA 143.861739 684.338261 205.093913 \n", + "SUB HIMALAYAN WEST BENGAL & SIKKIM 422.968696 2126.391304 165.801739 \n", + "TAMIL NADU 134.386087 330.847826 441.234783 \n", + "TELANGANA 56.173043 780.186957 99.620870 \n", + "UTTARAKHAND 147.773913 1131.367826 69.306087 \n", + "VIDARBHA 32.851304 964.403478 75.654783 \n", + "WEST MADHYA PRADESH 15.217391 864.043478 46.724348 \n", + "WEST RAJASTHAN 17.006087 258.707826 8.700870 \n", + "WEST UTTAR PRADESH 30.026087 721.676522 39.858261 \n", + "\n", + " MY_ANNUAL \n", + "SUBDIVISION \n", + "ANDAMAN & NICOBAR ISLANDS 2876.093636 \n", + "ARUNACHAL PRADESH 3414.786598 \n", + "ASSAM & MEGHALAYA 2580.698261 \n", + "BIHAR 1197.637391 \n", + "CHHATTISGARH 1371.732174 \n", + "COASTAL ANDHRA PRADESH 1052.903478 \n", + "COASTAL KARNATAKA 3405.900000 \n", + "EAST MADHYA PRADESH 1205.003478 \n", + "EAST RAJASTHAN 655.209565 \n", + "EAST UTTAR PRADESH 979.209565 \n", + "GANGETIC WEST BENGAL 1490.493913 \n", + "GUJARAT REGION 918.197391 \n", + "HARYANA DELHI & CHANDIGARH 530.490435 \n", + "HIMACHAL PRADESH 1260.340000 \n", + "JAMMU & KASHMIR 1135.843478 \n", + "JHARKHAND 1309.304348 \n", + "KERALA 2925.499130 \n", + "KONKAN & GOA 2977.677391 \n", + "LAKSHADWEEP 1561.094737 \n", + "MADHYA MAHARASHTRA 880.223478 \n", + "MATATHWADA 790.677391 \n", + "NAGA MANI MIZO TRIPURA 2433.638261 \n", + "NORTH INTERIOR KARNATAKA 717.793913 \n", + "ORISSA 1458.171304 \n", + "PUNJAB 593.540000 \n", + "RAYALSEEMA 766.206087 \n", + "SAURASHTRA & KUTCH 495.143478 \n", + "SOUTH INTERIOR KARNATAKA 1040.380000 \n", + "SUB HIMALAYAN WEST BENGAL & SIKKIM 2752.203478 \n", + "TAMIL NADU 943.727826 \n", + "TELANGANA 953.372174 \n", + "UTTARAKHAND 1465.693043 \n", + "VIDARBHA 1095.462609 \n", + "WEST MADHYA PRADESH 941.466957 \n", + "WEST RAJASTHAN 292.663478 \n", + "WEST UTTAR PRADESH 827.110435 " + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "## We can either replace subdivision means or means across all subdivision\n", + "data.groupby('SUBDIVISION').mean()" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
YEARJANFEBMARAPRMAYJUNJULAUGSEPOCTNOVDECANNUALJan-FebMar-MayJun-SepOct-DecMY_ANNUAL
0190149.287.129.22.3528.8517.5365.1481.1332.6388.5558.233.63373.2136.3560.31696.3980.33373.2
119020.0159.812.20.0446.1537.1228.9753.7666.2197.2359.0160.53520.7159.8458.32185.9716.73520.7
2190312.7144.00.01.0235.1479.9728.4326.7339.0181.2284.4225.02957.4156.7236.11874.0690.62957.4
319049.414.70.0202.4304.5495.1502.0160.1820.4222.2308.740.13079.624.1506.91977.6571.03079.6
419051.30.03.326.9279.5628.7368.7330.5297.0260.725.4344.72566.71.3309.71624.9630.82566.7
............................................................
411120115.12.83.185.9107.2153.6350.2254.0255.2117.4184.314.91533.77.9196.21013.0316.61533.7
4112201219.20.11.676.821.2327.0231.5381.2179.8145.912.48.81405.519.399.61119.5167.11405.5
4113201326.234.437.55.388.3426.2296.4154.4180.072.878.126.71426.360.6131.11057.0177.61426.3
4114201453.216.14.414.957.4244.1116.1466.1132.2169.259.062.31395.069.376.7958.5290.51395.0
411520152.20.53.787.1133.1296.6257.5146.4160.4165.4231.0159.01642.92.7223.9860.9555.41642.9
\n", + "

4116 rows × 19 columns

\n", + "
" + ], + "text/plain": [ + " YEAR JAN FEB MAR APR MAY JUN JUL AUG SEP \\\n", + "0 1901 49.2 87.1 29.2 2.3 528.8 517.5 365.1 481.1 332.6 \n", + "1 1902 0.0 159.8 12.2 0.0 446.1 537.1 228.9 753.7 666.2 \n", + "2 1903 12.7 144.0 0.0 1.0 235.1 479.9 728.4 326.7 339.0 \n", + "3 1904 9.4 14.7 0.0 202.4 304.5 495.1 502.0 160.1 820.4 \n", + "4 1905 1.3 0.0 3.3 26.9 279.5 628.7 368.7 330.5 297.0 \n", + "... ... ... ... ... ... ... ... ... ... ... \n", + "4111 2011 5.1 2.8 3.1 85.9 107.2 153.6 350.2 254.0 255.2 \n", + "4112 2012 19.2 0.1 1.6 76.8 21.2 327.0 231.5 381.2 179.8 \n", + "4113 2013 26.2 34.4 37.5 5.3 88.3 426.2 296.4 154.4 180.0 \n", + "4114 2014 53.2 16.1 4.4 14.9 57.4 244.1 116.1 466.1 132.2 \n", + "4115 2015 2.2 0.5 3.7 87.1 133.1 296.6 257.5 146.4 160.4 \n", + "\n", + " OCT NOV DEC ANNUAL Jan-Feb Mar-May Jun-Sep Oct-Dec \\\n", + "0 388.5 558.2 33.6 3373.2 136.3 560.3 1696.3 980.3 \n", + "1 197.2 359.0 160.5 3520.7 159.8 458.3 2185.9 716.7 \n", + "2 181.2 284.4 225.0 2957.4 156.7 236.1 1874.0 690.6 \n", + "3 222.2 308.7 40.1 3079.6 24.1 506.9 1977.6 571.0 \n", + "4 260.7 25.4 344.7 2566.7 1.3 309.7 1624.9 630.8 \n", + "... ... ... ... ... ... ... ... ... \n", + "4111 117.4 184.3 14.9 1533.7 7.9 196.2 1013.0 316.6 \n", + "4112 145.9 12.4 8.8 1405.5 19.3 99.6 1119.5 167.1 \n", + "4113 72.8 78.1 26.7 1426.3 60.6 131.1 1057.0 177.6 \n", + "4114 169.2 59.0 62.3 1395.0 69.3 76.7 958.5 290.5 \n", + "4115 165.4 231.0 159.0 1642.9 2.7 223.9 860.9 555.4 \n", + "\n", + " MY_ANNUAL \n", + "0 3373.2 \n", + "1 3520.7 \n", + "2 2957.4 \n", + "3 3079.6 \n", + "4 2566.7 \n", + "... ... \n", + "4111 1533.7 \n", + "4112 1405.5 \n", + "4113 1426.3 \n", + "4114 1395.0 \n", + "4115 1642.9 \n", + "\n", + "[4116 rows x 19 columns]" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data.groupby('SUBDIVISION').fillna(data.groupby('SUBDIVISION').mean())" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "YEAR 1958.218659\n", + "JAN 18.957320\n", + "FEB 21.805325\n", + "MAR 27.359197\n", + "APR 43.127432\n", + "MAY 85.745417\n", + "JUN 230.234444\n", + "JUL 347.214334\n", + "AUG 290.263497\n", + "SEP 197.361922\n", + "OCT 95.507009\n", + "NOV 39.866163\n", + "DEC 18.870580\n", + "ANNUAL 1411.008900\n", + "Jan-Feb 40.747786\n", + "Mar-May 155.901753\n", + "Jun-Sep 1064.724769\n", + "Oct-Dec 154.100487\n", + "MY_ANNUAL 1414.379252\n", + "dtype: float64" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data.mean()" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "## lets just replace it with mean across all subdivisions, since our aim would be to create a model, \n", + "## however, any imputaion like this should happen before the train/test split and seperately for train and test sets,\n", + "## to avoid data-or-information-leak\n", + "\n", + "data.fillna(data.mean(), inplace=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Plot the mean annual rainfall and note down your observations regarding the same" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "mean_annual_rainfall = (data.groupby('YEAR').ANNUAL.mean())\n", + "#mean_annual_rainfall.name = 'Mean Annual Rainfall'\n", + "mean_annual_rainfall.plot(legend=True, label='Mean Annual Rainfall', figsize=(12,10))\n", + "\n", + "ma10 = mean_annual_rainfall.rolling(10).mean()\n", + "#ma10.name = 'Moving Avg. for last 10 years'\n", + "ma10.plot(legend=True,label='Moving Avg. for last 10 years')\n", + "plt.xlabel('Year',fontsize=20)\n", + "plt.ylabel('Annual Rainfall (in mm)',fontsize=20)\n", + "plt.title('Annual Rainfall in India from year 1901-2015', fontsize=25)\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Insights:\n", + "* Highest average rainfall in India was recored in the year 1961.This was because in 1961 India received multiple cyclones.City of Pune was flooded in the year 1961 which is remembered as Panshet Flood.\n", + "\n", + "* Year 1965-66 were twin drought years and there was food scarcity in India.Prime Minister Lal Bahadur Shastri gave the Slogan Jai Jawan Jai Kissan to people of India.This lead to green revolution in India making India a food surplus country in the coming decades.\n", + "\n", + "* The red line is the 10 year moving average of the rainfall in India.It seems since 1960s there is slight dip in the rainfall in India.Now a days due to global warming the period of Monsoon season has shortned.We see more of erratic rainfall pattern.This needs more study." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Similarly analyze the seasonal rainfall as per subdivisions and note down your observations" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "(data\n", + " .loc[:,['SUBDIVISION','Jan-Feb','Mar-May', 'Jun-Sep','Oct-Dec']]\n", + " .groupby('SUBDIVISION')\n", + " .mean()\n", + " .sort_values('Jun-Sep')\n", + " .plot.barh(stacked=True,figsize=(16,10), title='Rainfall in subdivision in India'))\n", + "plt.xlabel('Rainfall in mm')\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Insights:\n", + "\n", + "* From the above graph we can see that majority of rainfall is received in the month of Jun-Sep which is the Monsoon season.Oct-Dec is time of return monsoon.Jan-Feb are the winter months.Mar-May is time for Summer rains.\n", + "\n", + "* Coastal Karnataka,Arunachal Pradesh,Konkan Goa and Kerala receive highest rainfall.\n", + "\n", + "* Rajastan,Gujrat,Haryana and Punjab receives low rainfall.Interesting thing is that Punjab and Haryana have high agricultural output despite low rainfall.Their water requirnments are met by rivers and canals.\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## With boxplot analyze the distribution of rainfall in various states and onote down your observations" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "plt.figure(figsize=(23,10))\n", + "ax=sns.boxplot(y='SUBDIVISION', x='ANNUAL', data=data, width=0.8)\n", + "ax.set_xlabel('Annual Rainfall in mm',fontsize=30)\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Insights:\n", + "We can see Subdivision Arunachal Pradesh shows highest highest difference between Maximum and Minimum rainfall received.Costal Karnataka receives close to 3400 mm of Annual rainfall which is the highest in India.West Rajastan receives the least amount of rainfall.\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Analyze the rainfall through years in Kerala and note down your observations" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "Kerala = data.loc[data.SUBDIVISION=='KERALA',:]\n", + "Kerala.groupby(\"YEAR\").ANNUAL.mean().plot(legend=True, label='Kerala')\n", + "mean_annual_rainfall.plot(figsize=(12,8), \n", + " title='Annual rainfall in Kerala compared to the whole of India',\n", + " legend=True, label='India')\n", + "plt.ylabel('Annual rainfall in mm')\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Insight:\n", + "Prior to 2018 Kerala had major flood in the year 1924 which is evident in the data.Contrary to popular belief Kerala received maximum annual rainfall in year 1961(4257 mm) and not 1924(4226 mm).In 2018 Kerala has received 2226.4 mm of rain in the monsoon season.This is 40% more than the average rainfall." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "![Districts of Kerala](../graph2.png)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Read the district wise rainfall data" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
STATE_UT_NAMEDISTRICTJANFEBMARAPRMAYJUNJULAUGSEPOCTNOVDECANNUALJan-FebMar-MayJun-SepOct-Dec
0ANDAMAN And NICOBAR ISLANDSNICOBAR107.357.965.2117.0358.5295.5285.0271.9354.8326.0315.2250.92805.2165.2540.71207.2892.1
1ANDAMAN And NICOBAR ISLANDSSOUTH ANDAMAN43.726.018.690.5374.4457.2421.3423.1455.6301.2275.8128.33015.769.7483.51757.2705.3
2ANDAMAN And NICOBAR ISLANDSN & M ANDAMAN32.715.98.653.4343.6503.3465.4460.9454.8276.1198.6100.02913.348.6405.61884.4574.7
3ARUNACHAL PRADESHLOHIT42.280.8176.4358.5306.4447.0660.1427.8313.6167.134.129.83043.8123.0841.31848.5231.0
4ARUNACHAL PRADESHEAST SIANG33.379.5105.9216.5323.0738.3990.9711.2568.0206.929.531.74034.7112.8645.43008.4268.1
\n", + "
" + ], + "text/plain": [ + " STATE_UT_NAME DISTRICT JAN FEB MAR APR \\\n", + "0 ANDAMAN And NICOBAR ISLANDS NICOBAR 107.3 57.9 65.2 117.0 \n", + "1 ANDAMAN And NICOBAR ISLANDS SOUTH ANDAMAN 43.7 26.0 18.6 90.5 \n", + "2 ANDAMAN And NICOBAR ISLANDS N & M ANDAMAN 32.7 15.9 8.6 53.4 \n", + "3 ARUNACHAL PRADESH LOHIT 42.2 80.8 176.4 358.5 \n", + "4 ARUNACHAL PRADESH EAST SIANG 33.3 79.5 105.9 216.5 \n", + "\n", + " MAY JUN JUL AUG SEP OCT NOV DEC ANNUAL Jan-Feb \\\n", + "0 358.5 295.5 285.0 271.9 354.8 326.0 315.2 250.9 2805.2 165.2 \n", + "1 374.4 457.2 421.3 423.1 455.6 301.2 275.8 128.3 3015.7 69.7 \n", + "2 343.6 503.3 465.4 460.9 454.8 276.1 198.6 100.0 2913.3 48.6 \n", + "3 306.4 447.0 660.1 427.8 313.6 167.1 34.1 29.8 3043.8 123.0 \n", + "4 323.0 738.3 990.9 711.2 568.0 206.9 29.5 31.7 4034.7 112.8 \n", + "\n", + " Mar-May Jun-Sep Oct-Dec \n", + "0 540.7 1207.2 892.1 \n", + "1 483.5 1757.2 705.3 \n", + "2 405.6 1884.4 574.7 \n", + "3 841.3 1848.5 231.0 \n", + "4 645.4 3008.4 268.1 " + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "Dist = pd.read_csv('../data/district wise rainfall normal.csv')\n", + "Dist.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Annual rainfall in different districts of Kerala" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "Kerala_Dist = Dist.loc[Dist.STATE_UT_NAME=='KERALA',:]\n", + "(Kerala_Dist\n", + " .sort_values('ANNUAL')\n", + " .plot.barh(x='DISTRICT',\n", + " y='ANNUAL', \n", + " title=\"Rainfall in Districts of Kerala\",\n", + " figsize=(12,8),\n", + " legend=False)\n", + ")\n", + "plt.xlabel('Annual Rainfall in mm')\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Insight:\n", + "North west districts are among the highest receiving rainfall places annually. " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Find out the districts with least rainfall" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "(Dist\n", + " .sort_values('ANNUAL',ascending=False)\n", + " .tail(10)\n", + " .plot.barh(x='DISTRICT',y='ANNUAL',\n", + " legend=False,\n", + " title='Districts with Minumum Rainfall in India')\n", + ")\n", + "plt.xlabel('Annual Rainfall (in mm)')\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "## If there are multiple Districts with same name, then we can be explicit by grouping State and District\n", + "(Dist\n", + " .groupby(['STATE_UT_NAME', 'DISTRICT'])\n", + " .ANNUAL.mean()\n", + " .sort_values(ascending=False)\n", + " .tail(10)\n", + " .plot.barh(x='DISTRICT',y='ANNUAL',\n", + " legend=False,\n", + " title='Districts with Minumum Rainfall in India')\n", + ")\n", + "plt.xlabel('Annual Rainfall (in mm)')\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Insight:\n", + "Ladak which is part of Jammu and Kashmir receives 94.6 mm. Ladak and Kargil which receive less rainfall are part of Indian State Jammu and Kashmir.Jaisalmer,Sri Ganganaga and Barmer are part of Rajastan State.\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Similarly, find districts with maximum rainfall" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "\n", + "(Dist\n", + " .groupby(['STATE_UT_NAME', 'DISTRICT'])\n", + " .ANNUAL.mean()\n", + " .sort_values(ascending=True)\n", + " .tail(10)\n", + " .plot.barh(x='DISTRICT',y='ANNUAL',\n", + " legend=False)\n", + ")\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Insights:\n", + "Districts from North eastern states - Manipur, Meghalaya, Arunachal Pradesh and also Southern state of Karnataka receive max rainfall " + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.3" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/San_Fransisco_salary_analysis/notebook/SF_salary_data_analysis-MK.ipynb b/San_Fransisco_salary_analysis/notebook/SF_salary_data_analysis-MK.ipynb new file mode 100644 index 0000000..8682a82 --- /dev/null +++ b/San_Fransisco_salary_analysis/notebook/SF_salary_data_analysis-MK.ipynb @@ -0,0 +1,2056 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "Ow2UdpIAXhtq" + }, + "source": [ + "# City Salary Data Analyis\n", + "One way to understand how a city government works is by looking at who it employs and how its employees are compensated. This data contains the names, job title, and compensation for San Francisco city employees on an annual basis from 2011 to 2014.\n", + "\n", + "# Exploration Ideas\n", + "\n", + "* How have salaries changed over time between different groups of people?\n", + "* How are base pay, overtime pay, and benefits allocated between different groups?\n", + "* Is there any evidence of pay discrimination based on gender in this dataset?\n", + "* How is budget allocated based on different groups and responsibilities?" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "wwkCDpBoX6cy" + }, + "source": [ + "## Import libraries" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "xn6L-niwV9Kp" + }, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "import seaborn as sns\n", + "import matplotlib.pyplot as plt" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "tm2tW8YTYAVw" + }, + "source": [ + "## Read the data" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "ccLpmOX2YDVP" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
IdEmployeeNameJobTitleBasePayOvertimePayOtherPayBenefitsTotalPayTotalPayBenefitsYearNotesAgencyStatus
01NATHANIEL FORDGENERAL MANAGER-METROPOLITAN TRANSIT AUTHORITY167411.180.0400184.25NaN567595.43567595.432011NaNSan FranciscoNaN
12GARY JIMENEZCAPTAIN III (POLICE DEPARTMENT)155966.02245131.88137811.38NaN538909.28538909.282011NaNSan FranciscoNaN
23ALBERT PARDINICAPTAIN III (POLICE DEPARTMENT)212739.13106088.1816452.6NaN335279.91335279.912011NaNSan FranciscoNaN
34CHRISTOPHER CHONGWIRE ROPE CABLE MAINTENANCE MECHANIC77916.056120.71198306.9NaN332343.61332343.612011NaNSan FranciscoNaN
45PATRICK GARDNERDEPUTY CHIEF OF DEPARTMENT,(FIRE DEPARTMENT)134401.69737.0182234.59NaN326373.19326373.192011NaNSan FranciscoNaN
\n", + "
" + ], + "text/plain": [ + " Id EmployeeName JobTitle \\\n", + "0 1 NATHANIEL FORD GENERAL MANAGER-METROPOLITAN TRANSIT AUTHORITY \n", + "1 2 GARY JIMENEZ CAPTAIN III (POLICE DEPARTMENT) \n", + "2 3 ALBERT PARDINI CAPTAIN III (POLICE DEPARTMENT) \n", + "3 4 CHRISTOPHER CHONG WIRE ROPE CABLE MAINTENANCE MECHANIC \n", + "4 5 PATRICK GARDNER DEPUTY CHIEF OF DEPARTMENT,(FIRE DEPARTMENT) \n", + "\n", + " BasePay OvertimePay OtherPay Benefits TotalPay TotalPayBenefits \\\n", + "0 167411.18 0.0 400184.25 NaN 567595.43 567595.43 \n", + "1 155966.02 245131.88 137811.38 NaN 538909.28 538909.28 \n", + "2 212739.13 106088.18 16452.6 NaN 335279.91 335279.91 \n", + "3 77916.0 56120.71 198306.9 NaN 332343.61 332343.61 \n", + "4 134401.6 9737.0 182234.59 NaN 326373.19 326373.19 \n", + "\n", + " Year Notes Agency Status \n", + "0 2011 NaN San Francisco NaN \n", + "1 2011 NaN San Francisco NaN \n", + "2 2011 NaN San Francisco NaN \n", + "3 2011 NaN San Francisco NaN \n", + "4 2011 NaN San Francisco NaN " + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "salaries = pd.read_csv('../data/Salaries.csv', low_memory=False)\n", + "salaries.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "ONgXgAFbYErb" + }, + "source": [ + "## 1. Basic data study" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "swpD4k8uYIe5" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "RangeIndex: 148654 entries, 0 to 148653\n", + "Data columns (total 13 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 Id 148654 non-null int64 \n", + " 1 EmployeeName 148654 non-null object \n", + " 2 JobTitle 148654 non-null object \n", + " 3 BasePay 148049 non-null object \n", + " 4 OvertimePay 148654 non-null object \n", + " 5 OtherPay 148654 non-null object \n", + " 6 Benefits 112495 non-null object \n", + " 7 TotalPay 148654 non-null float64\n", + " 8 TotalPayBenefits 148654 non-null float64\n", + " 9 Year 148654 non-null int64 \n", + " 10 Notes 0 non-null float64\n", + " 11 Agency 148654 non-null object \n", + " 12 Status 38119 non-null object \n", + "dtypes: float64(3), int64(2), object(8)\n", + "memory usage: 14.7+ MB\n" + ] + } + ], + "source": [ + "salaries.info()" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
IdTotalPayTotalPayBenefitsYearNotes
count148654.000000148654.000000148654.000000148654.0000000.0
mean74327.50000074768.32197293692.5548112012.522643NaN
std42912.85779550517.00527462793.5334831.117538NaN
min1.000000-618.130000-618.1300002011.000000NaN
25%37164.25000036168.99500044065.6500002012.000000NaN
50%74327.50000071426.61000092404.0900002013.000000NaN
75%111490.750000105839.135000132876.4500002014.000000NaN
max148654.000000567595.430000567595.4300002014.000000NaN
\n", + "
" + ], + "text/plain": [ + " Id TotalPay TotalPayBenefits Year Notes\n", + "count 148654.000000 148654.000000 148654.000000 148654.000000 0.0\n", + "mean 74327.500000 74768.321972 93692.554811 2012.522643 NaN\n", + "std 42912.857795 50517.005274 62793.533483 1.117538 NaN\n", + "min 1.000000 -618.130000 -618.130000 2011.000000 NaN\n", + "25% 37164.250000 36168.995000 44065.650000 2012.000000 NaN\n", + "50% 74327.500000 71426.610000 92404.090000 2013.000000 NaN\n", + "75% 111490.750000 105839.135000 132876.450000 2014.000000 NaN\n", + "max 148654.000000 567595.430000 567595.430000 2014.000000 NaN" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "salaries.describe()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "351Gd6SrYKJp" + }, + "source": [ + "#### What are your observations about the basic statistics of data?" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Insight:\n", + "- data cleaning - Pay cols have str data, there -ve salaries\n", + "- Notes, Id, no value\n", + "- benfits play role\n", + "- data is from 2011 to 2014" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "O3PSYxkHYQ9T" + }, + "source": [ + "## 2. What columns do you think do not add value to our analysis? \n", + "Drop those columns." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(148654, 13)" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "salaries.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "RO-dE2B9YP-3" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "Id 148654\n", + "EmployeeName 110811\n", + "JobTitle 2159\n", + "BasePay 109724\n", + "OvertimePay 66162\n", + "OtherPay 84243\n", + "Benefits 98647\n", + "TotalPay 138486\n", + "TotalPayBenefits 142098\n", + "Year 4\n", + "Notes 0\n", + "Agency 1\n", + "Status 2\n", + "dtype: int64" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "salaries.nunique()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "`Id`, `Notes`, `Status`, `Agency` seems empty or have unqiue value for each row, it is safe to assume these do not add value and that we can drop them" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
EmployeeNameJobTitleBasePayOvertimePayOtherPayBenefitsTotalPayTotalPayBenefitsYear
0NATHANIEL FORDGENERAL MANAGER-METROPOLITAN TRANSIT AUTHORITY167411.180.0400184.25NaN567595.43567595.432011
1GARY JIMENEZCAPTAIN III (POLICE DEPARTMENT)155966.02245131.88137811.38NaN538909.28538909.282011
2ALBERT PARDINICAPTAIN III (POLICE DEPARTMENT)212739.13106088.1816452.6NaN335279.91335279.912011
3CHRISTOPHER CHONGWIRE ROPE CABLE MAINTENANCE MECHANIC77916.056120.71198306.9NaN332343.61332343.612011
4PATRICK GARDNERDEPUTY CHIEF OF DEPARTMENT,(FIRE DEPARTMENT)134401.69737.0182234.59NaN326373.19326373.192011
\n", + "
" + ], + "text/plain": [ + " EmployeeName JobTitle \\\n", + "0 NATHANIEL FORD GENERAL MANAGER-METROPOLITAN TRANSIT AUTHORITY \n", + "1 GARY JIMENEZ CAPTAIN III (POLICE DEPARTMENT) \n", + "2 ALBERT PARDINI CAPTAIN III (POLICE DEPARTMENT) \n", + "3 CHRISTOPHER CHONG WIRE ROPE CABLE MAINTENANCE MECHANIC \n", + "4 PATRICK GARDNER DEPUTY CHIEF OF DEPARTMENT,(FIRE DEPARTMENT) \n", + "\n", + " BasePay OvertimePay OtherPay Benefits TotalPay TotalPayBenefits \\\n", + "0 167411.18 0.0 400184.25 NaN 567595.43 567595.43 \n", + "1 155966.02 245131.88 137811.38 NaN 538909.28 538909.28 \n", + "2 212739.13 106088.18 16452.6 NaN 335279.91 335279.91 \n", + "3 77916.0 56120.71 198306.9 NaN 332343.61 332343.61 \n", + "4 134401.6 9737.0 182234.59 NaN 326373.19 326373.19 \n", + "\n", + " Year \n", + "0 2011 \n", + "1 2011 \n", + "2 2011 \n", + "3 2011 \n", + "4 2011 " + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "salaries.drop(['Id', 'Notes', 'Status' , 'Agency'], axis = 1, inplace=True)\n", + "salaries.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "TPYk-QPVYVUo" + }, + "source": [ + "## 3. Check for missing values in the data. \n", + "What is the strategy you will apply to deal with missing values?" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Typically we can either impute or remove the null values, lets inspect which columns have null values, however it is often best to **check with the business team**, if you are going to make any assumptions." + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "7cJw9WgMYcCg" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "EmployeeName 0\n", + "JobTitle 0\n", + "BasePay 605\n", + "OvertimePay 0\n", + "OtherPay 0\n", + "Benefits 36159\n", + "TotalPay 0\n", + "TotalPayBenefits 0\n", + "Year 0\n", + "dtype: int64" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "salaries.isnull().sum()" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
EmployeeNameJobTitleBasePayOvertimePayOtherPayBenefitsTotalPayTotalPayBenefitsYear
81391Kevin P CashmanDeputy Chief 3NaN0.0149934.110.0149934.11149934.112013
84506Demetrya MullensLicensed Vocational NurseNaN0.0110485.4120779.0110485.41131264.412013
84960Michael M HoranPark Patrol OfficerNaN0.0120000.08841.48120000.00128841.482013
90525Thomas TangPolice Officer 3NaN0.0106079.310.0106079.31106079.312013
90786Michael C HillDeputy SheriffNaN0.081299.0223877.5381299.02105176.552013
..............................
110526Arthur L CurryPS Aide Health ServicesNaN0.010.670.010.6710.672013
110527Nereida VegaSenior ClerkNaN0.05.560.05.565.562013
110528Timothy E GibsonPolice Officer 3NaN0.00.0-2.730.00-2.732013
110529Mark E LahertyPolice Officer 3NaN0.00.0-8.20.00-8.202013
110530David P KuciaPolice Officer 3NaN0.00.0-33.890.00-33.892013
\n", + "

605 rows × 9 columns

\n", + "
" + ], + "text/plain": [ + " EmployeeName JobTitle BasePay OvertimePay \\\n", + "81391 Kevin P Cashman Deputy Chief 3 NaN 0.0 \n", + "84506 Demetrya Mullens Licensed Vocational Nurse NaN 0.0 \n", + "84960 Michael M Horan Park Patrol Officer NaN 0.0 \n", + "90525 Thomas Tang Police Officer 3 NaN 0.0 \n", + "90786 Michael C Hill Deputy Sheriff NaN 0.0 \n", + "... ... ... ... ... \n", + "110526 Arthur L Curry PS Aide Health Services NaN 0.0 \n", + "110527 Nereida Vega Senior Clerk NaN 0.0 \n", + "110528 Timothy E Gibson Police Officer 3 NaN 0.0 \n", + "110529 Mark E Laherty Police Officer 3 NaN 0.0 \n", + "110530 David P Kucia Police Officer 3 NaN 0.0 \n", + "\n", + " OtherPay Benefits TotalPay TotalPayBenefits Year \n", + "81391 149934.11 0.0 149934.11 149934.11 2013 \n", + "84506 110485.41 20779.0 110485.41 131264.41 2013 \n", + "84960 120000.0 8841.48 120000.00 128841.48 2013 \n", + "90525 106079.31 0.0 106079.31 106079.31 2013 \n", + "90786 81299.02 23877.53 81299.02 105176.55 2013 \n", + "... ... ... ... ... ... \n", + "110526 10.67 0.0 10.67 10.67 2013 \n", + "110527 5.56 0.0 5.56 5.56 2013 \n", + "110528 0.0 -2.73 0.00 -2.73 2013 \n", + "110529 0.0 -8.2 0.00 -8.20 2013 \n", + "110530 0.0 -33.89 0.00 -33.89 2013 \n", + "\n", + "[605 rows x 9 columns]" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "salaries.loc[salaries.BasePay.isnull(), :]\n", + "# salaries[salaries.BasePay.isnull()]" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
EmployeeNameJobTitleBasePayOvertimePayOtherPayBenefitsTotalPayTotalPayBenefitsYear
0NATHANIEL FORDGENERAL MANAGER-METROPOLITAN TRANSIT AUTHORITY167411.180.0400184.25NaN567595.43567595.432011
1GARY JIMENEZCAPTAIN III (POLICE DEPARTMENT)155966.02245131.88137811.38NaN538909.28538909.282011
2ALBERT PARDINICAPTAIN III (POLICE DEPARTMENT)212739.13106088.1816452.6NaN335279.91335279.912011
3CHRISTOPHER CHONGWIRE ROPE CABLE MAINTENANCE MECHANIC77916.056120.71198306.9NaN332343.61332343.612011
4PATRICK GARDNERDEPUTY CHIEF OF DEPARTMENT,(FIRE DEPARTMENT)134401.69737.0182234.59NaN326373.19326373.192011
..............................
36154SERENA HUGHESSWIMMING INSTRUCTOR/POOL LIFEGUARD0.00.04.17NaN4.174.172011
36155JOE BROWN JRTRANSIT OPERATOR0.00.00.3NaN0.300.302011
36156PAULETTE ADAMSSTATIONARY ENGINEER, WATER TREATMENT PLANT0.00.00.0NaN0.000.002011
36157KAUKAB MOHSINTRANSIT OPERATOR0.00.00.0NaN0.000.002011
36158JOSEPHINE MCCREARYMANAGER IV0.00.00.0NaN0.000.002011
\n", + "

36159 rows × 9 columns

\n", + "
" + ], + "text/plain": [ + " EmployeeName JobTitle \\\n", + "0 NATHANIEL FORD GENERAL MANAGER-METROPOLITAN TRANSIT AUTHORITY \n", + "1 GARY JIMENEZ CAPTAIN III (POLICE DEPARTMENT) \n", + "2 ALBERT PARDINI CAPTAIN III (POLICE DEPARTMENT) \n", + "3 CHRISTOPHER CHONG WIRE ROPE CABLE MAINTENANCE MECHANIC \n", + "4 PATRICK GARDNER DEPUTY CHIEF OF DEPARTMENT,(FIRE DEPARTMENT) \n", + "... ... ... \n", + "36154 SERENA HUGHES SWIMMING INSTRUCTOR/POOL LIFEGUARD \n", + "36155 JOE BROWN JR TRANSIT OPERATOR \n", + "36156 PAULETTE ADAMS STATIONARY ENGINEER, WATER TREATMENT PLANT \n", + "36157 KAUKAB MOHSIN TRANSIT OPERATOR \n", + "36158 JOSEPHINE MCCREARY MANAGER IV \n", + "\n", + " BasePay OvertimePay OtherPay Benefits TotalPay TotalPayBenefits \\\n", + "0 167411.18 0.0 400184.25 NaN 567595.43 567595.43 \n", + "1 155966.02 245131.88 137811.38 NaN 538909.28 538909.28 \n", + "2 212739.13 106088.18 16452.6 NaN 335279.91 335279.91 \n", + "3 77916.0 56120.71 198306.9 NaN 332343.61 332343.61 \n", + "4 134401.6 9737.0 182234.59 NaN 326373.19 326373.19 \n", + "... ... ... ... ... ... ... \n", + "36154 0.0 0.0 4.17 NaN 4.17 4.17 \n", + "36155 0.0 0.0 0.3 NaN 0.30 0.30 \n", + "36156 0.0 0.0 0.0 NaN 0.00 0.00 \n", + "36157 0.0 0.0 0.0 NaN 0.00 0.00 \n", + "36158 0.0 0.0 0.0 NaN 0.00 0.00 \n", + "\n", + " Year \n", + "0 2011 \n", + "1 2011 \n", + "2 2011 \n", + "3 2011 \n", + "4 2011 \n", + "... ... \n", + "36154 2011 \n", + "36155 2011 \n", + "36156 2011 \n", + "36157 2011 \n", + "36158 2011 \n", + "\n", + "[36159 rows x 9 columns]" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "salaries.loc[salaries.Benefits.isnull(), :]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### It is safe to assume, that not all roles would get benefits, so we can impute null values with `0` " + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "salaries.Benefits.fillna(0, inplace=True)\n", + "salaries.Benefits.isnull().sum()" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "EmployeeName 0\n", + "JobTitle 0\n", + "BasePay 605\n", + "OvertimePay 0\n", + "OtherPay 0\n", + "Benefits 0\n", + "TotalPay 0\n", + "TotalPayBenefits 0\n", + "Year 0\n", + "dtype: int64" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "salaries.isnull().sum()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Also, since the percentage of records with missing `BasePay` is very low, we can safely remove these from our analysis." + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "EmployeeName 0\n", + "JobTitle 0\n", + "BasePay 0\n", + "OvertimePay 0\n", + "OtherPay 0\n", + "Benefits 0\n", + "TotalPay 0\n", + "TotalPayBenefits 0\n", + "Year 0\n", + "dtype: int64" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "salaries.dropna(inplace=True)\n", + "salaries.isnull().sum()" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(148049, 9)" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "salaries.shape" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "GEMD4kMSYcs6" + }, + "source": [ + "## 4. Convert the object values of all the types of pays to numeric, use to_numeric method of pandas to convert. \n", + "Is it as straightforward or there is some descripency? if yes, how will you overcome it?" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "EmployeeName object\n", + "JobTitle object\n", + "BasePay object\n", + "OvertimePay object\n", + "OtherPay object\n", + "Benefits object\n", + "TotalPay float64\n", + "TotalPayBenefits float64\n", + "Year int64\n", + "dtype: object" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "salaries.dtypes" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "qJMPW47DY1Qr" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
EmployeeNameJobTitleBasePayOvertimePayOtherPayBenefitsTotalPayTotalPayBenefitsYear
148646Not providedNot providedNot ProvidedNot ProvidedNot ProvidedNot Provided0.00.02014
148650Not providedNot providedNot ProvidedNot ProvidedNot ProvidedNot Provided0.00.02014
148651Not providedNot providedNot ProvidedNot ProvidedNot ProvidedNot Provided0.00.02014
148652Not providedNot providedNot ProvidedNot ProvidedNot ProvidedNot Provided0.00.02014
\n", + "
" + ], + "text/plain": [ + " EmployeeName JobTitle BasePay OvertimePay OtherPay \\\n", + "148646 Not provided Not provided Not Provided Not Provided Not Provided \n", + "148650 Not provided Not provided Not Provided Not Provided Not Provided \n", + "148651 Not provided Not provided Not Provided Not Provided Not Provided \n", + "148652 Not provided Not provided Not Provided Not Provided Not Provided \n", + "\n", + " Benefits TotalPay TotalPayBenefits Year \n", + "148646 Not Provided 0.0 0.0 2014 \n", + "148650 Not Provided 0.0 0.0 2014 \n", + "148651 Not Provided 0.0 0.0 2014 \n", + "148652 Not Provided 0.0 0.0 2014 " + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "salaries.loc[salaries.Benefits == 'Not Provided', :]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### To convert one column with mixed type, use `pd.to_numeric` \n", + "pass `errors='coerce'` to replace any non-numeric value to `NaN`" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 0.0\n", + "1 0.0\n", + "2 0.0\n", + "3 0.0\n", + "4 0.0\n", + " ... \n", + "148649 0.0\n", + "148650 NaN\n", + "148651 NaN\n", + "148652 NaN\n", + "148653 0.0\n", + "Name: Benefits, Length: 148049, dtype: float64" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pd.to_numeric(salaries.Benefits, errors='coerce')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Lets convert all the columns" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [], + "source": [ + "for column in ['BasePay','OvertimePay','OtherPay','Benefits','TotalPay','TotalPayBenefits']:\n", + " salaries[column] = pd.to_numeric(salaries[column], errors='coerce')" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "EmployeeName object\n", + "JobTitle object\n", + "BasePay float64\n", + "OvertimePay float64\n", + "OtherPay float64\n", + "Benefits float64\n", + "TotalPay float64\n", + "TotalPayBenefits float64\n", + "Year int64\n", + "dtype: object" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "salaries.dtypes" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "a2rQ7KbEY2CX" + }, + "source": [ + "## 5. Find the job titles of the 10 highest median base pay.\n", + "\n", + "Base Pay -> The most basic sum of money or hourly rate paid to an employee of a business in compensation for their work efforts or time spent on the job" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [], + "source": [ + "salaries['jobtitle'] = salaries.JobTitle.str.lower()" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
EmployeeNameJobTitleBasePayOvertimePayOtherPayBenefitsTotalPayTotalPayBenefitsYearjobtitle
0NATHANIEL FORDGENERAL MANAGER-METROPOLITAN TRANSIT AUTHORITY167411.180.00400184.250.0567595.43567595.432011general manager-metropolitan transit authority
1GARY JIMENEZCAPTAIN III (POLICE DEPARTMENT)155966.02245131.88137811.380.0538909.28538909.282011captain iii (police department)
2ALBERT PARDINICAPTAIN III (POLICE DEPARTMENT)212739.13106088.1816452.600.0335279.91335279.912011captain iii (police department)
3CHRISTOPHER CHONGWIRE ROPE CABLE MAINTENANCE MECHANIC77916.0056120.71198306.900.0332343.61332343.612011wire rope cable maintenance mechanic
4PATRICK GARDNERDEPUTY CHIEF OF DEPARTMENT,(FIRE DEPARTMENT)134401.609737.00182234.590.0326373.19326373.192011deputy chief of department,(fire department)
\n", + "
" + ], + "text/plain": [ + " EmployeeName JobTitle \\\n", + "0 NATHANIEL FORD GENERAL MANAGER-METROPOLITAN TRANSIT AUTHORITY \n", + "1 GARY JIMENEZ CAPTAIN III (POLICE DEPARTMENT) \n", + "2 ALBERT PARDINI CAPTAIN III (POLICE DEPARTMENT) \n", + "3 CHRISTOPHER CHONG WIRE ROPE CABLE MAINTENANCE MECHANIC \n", + "4 PATRICK GARDNER DEPUTY CHIEF OF DEPARTMENT,(FIRE DEPARTMENT) \n", + "\n", + " BasePay OvertimePay OtherPay Benefits TotalPay TotalPayBenefits \\\n", + "0 167411.18 0.00 400184.25 0.0 567595.43 567595.43 \n", + "1 155966.02 245131.88 137811.38 0.0 538909.28 538909.28 \n", + "2 212739.13 106088.18 16452.60 0.0 335279.91 335279.91 \n", + "3 77916.00 56120.71 198306.90 0.0 332343.61 332343.61 \n", + "4 134401.60 9737.00 182234.59 0.0 326373.19 326373.19 \n", + "\n", + " Year jobtitle \n", + "0 2011 general manager-metropolitan transit authority \n", + "1 2011 captain iii (police department) \n", + "2 2011 captain iii (police department) \n", + "3 2011 wire rope cable maintenance mechanic \n", + "4 2011 deputy chief of department,(fire department) " + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "salaries.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "Jq-6G__LY6zi" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "jobtitle\n", + "chief of police 305014.02\n", + "chief, fire department 302068.00\n", + "gen mgr, public trnsp dept 294000.18\n", + "chief of department, (fire department) 285262.00\n", + "dep dir for investments, ret 276153.76\n", + "dept head v 270616.27\n", + "adm, sfgh medical center 268946.02\n", + "controller 267914.00\n", + "deputy chief 3 263408.55\n", + "dep chf of dept (fire dept) 260728.00\n", + "Name: BasePay, dtype: float64" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "(salaries\n", + " .groupby('jobtitle')['BasePay']\n", + " .median()\n", + " .round(2)\n", + " .nlargest(10)\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "wMMqnYoSY77K" + }, + "source": [ + "## 6. Find the job titles of the 10 highest median Overtime Pay.\n", + "\n", + "\n", + "Overtime Pay -> Additional financial compensation for any hours worked by nonexempt staff over the amount of forty hours per week." + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "NZUvy9HjZAaK" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "salaries.groupby('jobtitle')['OvertimePay'].median().round(2).nlargest(10).plot.barh(title='Top 10 overtime')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "T6muLiSWZHBv" + }, + "source": [ + "## 7. Plot the sectors which have provided the maximum number of employment" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "EmployeeName NATHANIEL FORD\n", + "JobTitle GENERAL MANAGER-METROPOLITAN TRANSIT AUTHORITY\n", + "BasePay 167411\n", + "OvertimePay 0\n", + "OtherPay 400184\n", + "Benefits 0\n", + "TotalPay 567595\n", + "TotalPayBenefits 567595\n", + "Year 2011\n", + "jobtitle general manager-metropolitan transit authority\n", + "Name: 0, dtype: object" + ] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "salaries.loc[0,:]" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "tG-rwzIGZ2rw" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 25, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "(salaries\n", + " .jobtitle\n", + " .value_counts()\n", + " .nlargest(30)\n", + " .sort_values()\n", + " .plot.barh(figsize=(10,10))\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "JKmQo5SLZ-Cq" + }, + "source": [ + "## 8. Plot the top 10 Job titles with highest mean TotalPayBenefits" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "qBBjyNazZ_vb" + }, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "(salaries\n", + " .groupby('jobtitle')['TotalPayBenefits']\n", + " .mean()\n", + " .round(2)\n", + " .nlargest(10)\n", + " .plot.barh().invert_yaxis()\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "jq03kTXoaF4o" + }, + "source": [ + "## 9. Find the highest paid employees for the year 2012, 2013 and 2014\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
YearEmployeeNameTotalPay
361592012Gary Altenberg362844.66
361672012John Goldberg350403.41
361632012Frederick Binkley337204.86
361612012Khoa Trinh336393.73
361732012Mark Kearney327689.78
729272013Samson Lai347102.32
729252013Gregory P Suhr339282.07
729262013Joanne M Hayes-White336922.01
729282013Ellen G Moffatt335537.96
729302013David L Franklin333888.32
1105312014David Shinn471952.64
1105322014Amy P Hart390111.98
1105362014Ellen G Moffatt344187.46
1105332014William J Coaker Jr.339653.70
1105392014Samson Lai335484.96
\n", + "
" + ], + "text/plain": [ + " Year EmployeeName TotalPay\n", + "36159 2012 Gary Altenberg 362844.66\n", + "36167 2012 John Goldberg 350403.41\n", + "36163 2012 Frederick Binkley 337204.86\n", + "36161 2012 Khoa Trinh 336393.73\n", + "36173 2012 Mark Kearney 327689.78\n", + "72927 2013 Samson Lai 347102.32\n", + "72925 2013 Gregory P Suhr 339282.07\n", + "72926 2013 Joanne M Hayes-White 336922.01\n", + "72928 2013 Ellen G Moffatt 335537.96\n", + "72930 2013 David L Franklin 333888.32\n", + "110531 2014 David Shinn 471952.64\n", + "110532 2014 Amy P Hart 390111.98\n", + "110536 2014 Ellen G Moffatt 344187.46\n", + "110533 2014 William J Coaker Jr. 339653.70\n", + "110539 2014 Samson Lai 335484.96" + ] + }, + "execution_count": 27, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "high_paid_emp = (salaries\n", + " .loc[salaries.Year.isin([2012,2013,2014]),:] # filter required years \n", + " .groupby(['Year'])['TotalPay']\n", + " .nlargest() # by default top 5\n", + " .index\n", + " .get_level_values(-1) # index values of last level, in this case row index of highest salaries individuals\n", + " )\n", + "salaries.loc[high_paid_emp, ['Year','EmployeeName','TotalPay']]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "UCenIjdfaQGB" + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "colab": { + "name": "SF_salary_data_analysis.ipynb", + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.8" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/Vehicle_insurane/notebook/Vehicle_insurance_classification-MK.ipynb b/Vehicle_insurane/notebook/Vehicle_insurance_classification-MK.ipynb new file mode 100644 index 0000000..9fa75e8 --- /dev/null +++ b/Vehicle_insurane/notebook/Vehicle_insurance_classification-MK.ipynb @@ -0,0 +1,1502 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Vehicle Insurance Interest Response Classification\n", + "\n", + "Our client is an Insurance company that has provided Health Insurance to its customers now they need your help in building a model to predict whether the policyholders (customers) from past year will also be interested in Vehicle Insurance provided by the company.\n", + "\n", + "Just like medical insurance, there is vehicle insurance where every year customer needs to pay a premium of certain amount to insurance provider company so that in case of unfortunate accident by the vehicle, the insurance provider company will provide a compensation (called ‘sum assured’) to the customer.\n", + "\n", + "Building a model to predict whether a customer would be interested in Vehicle Insurance is extremely helpful for the company because it can then accordingly plan its communication strategy to reach out to those customers and optimise its business model and revenue.\n", + "\n", + "## Problem Statement \n", + "\n", + "**An insurance company has provided Health Insurance to its customers now they want a model to predict whether the policyholders (customers) from past year will also be interested in Vehicle Insurance provided by the company.**\n", + "\n", + "## Data\n", + "\n", + "|Variable|Definition|\n", + "|-----|-----|\n", + "|id\t|Unique ID for the customer|\n", + "|Gender\t|Gender of the customer|\n", + "|Age\t|Age of the customer|\n", + "|Driving_License\t|0 : Customer does not have DL, 1 : Customer already has DL|\n", + "|Region_Code\t|Unique code for the region of the customer|\n", + "|Previously_Insured\t|1 : Customer already has Vehicle Insurance, 0 : Customer doesn't have Vehicle Insurance|\n", + "|Vehicle_Age\t|Age of the Vehicle|\n", + "|Vehicle_Damage\t|1 : Customer got his/her vehicle damaged in the past. 0 : Customer didn't get his/her vehicle damaged in the past.|\n", + "|Annual_Premium\t|The amount customer needs to pay as premium in the year|\n", + "|PolicySalesChannel\t|Anonymized Code for the channel of outreaching to the customer ie. Different Agents, Over Mail, Over Phone, In Person, etc.|\n", + "|Vintage\t|Number of Days, Customer has been associated with the company|\n", + "|Response\t|1 : Customer is interested, 0 : Customer is not interested|" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Data Source: [Kaggle](https://www.kaggle.com/anmolkumar/health-insurance-cross-sell-prediction) " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Resources:\n", + "- [Handle imbalance classes](https://elitedatascience.com/imbalanced-classes)\n", + "- [One-Hot Encoding Pandas](https://pandas.pydata.org/docs/reference/api/pandas.get_dummies.html)\n", + " - [drop_first=True discussion#1](https://datascience.stackexchange.com/questions/28353/always-drop-the-first-column-after-performing-one-hot-encoding)\n", + " - [drop_first=True discussion#2](https://www.kaggle.com/c/instant-gratification/discussion/92817)\n", + "- [Seaborn percentage in countplot](https://github.com/mwaskom/seaborn/issues/1027)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Import libraries" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import pandas as pd\n", + "import matplotlib.pyplot as plt\n", + "import seaborn as sns\n", + "from sklearn.model_selection import train_test_split\n", + "from sklearn.linear_model import LogisticRegression\n", + "from sklearn.metrics import accuracy_score,roc_auc_score,confusion_matrix,classification_report\n", + "from sklearn.utils import resample,shuffle" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Read the data" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "data = pd.read_csv('../data/data.csv')" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(381109, 12)" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "RangeIndex: 381109 entries, 0 to 381108\n", + "Data columns (total 12 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 id 381109 non-null int64 \n", + " 1 Gender 381109 non-null object \n", + " 2 Age 381109 non-null int64 \n", + " 3 Driving_License 381109 non-null int64 \n", + " 4 Region_Code 381109 non-null float64\n", + " 5 Previously_Insured 381109 non-null int64 \n", + " 6 Vehicle_Age 381109 non-null object \n", + " 7 Vehicle_Damage 381109 non-null object \n", + " 8 Annual_Premium 381109 non-null float64\n", + " 9 Policy_Sales_Channel 381109 non-null float64\n", + " 10 Vintage 381109 non-null int64 \n", + " 11 Response 381109 non-null int64 \n", + "dtypes: float64(3), int64(6), object(3)\n", + "memory usage: 34.9+ MB\n" + ] + } + ], + "source": [ + "data.info()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Insight:\n", + "Seems like data is clean, without any missing values" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Summary of the data\n" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idAgeDriving_LicenseRegion_CodePreviously_InsuredAnnual_PremiumPolicy_Sales_ChannelVintageResponse
count381109.000000381109.000000381109.000000381109.000000381109.000000381109.000000381109.000000381109.000000381109.000000
mean190555.00000038.8225840.99786926.3888070.45821030564.389581112.034295154.3473970.122563
std110016.83620815.5116110.04611013.2298880.49825117213.15505754.20399583.6713040.327936
min1.00000020.0000000.0000000.0000000.0000002630.0000001.00000010.0000000.000000
25%95278.00000025.0000001.00000015.0000000.00000024405.00000029.00000082.0000000.000000
50%190555.00000036.0000001.00000028.0000000.00000031669.000000133.000000154.0000000.000000
75%285832.00000049.0000001.00000035.0000001.00000039400.000000152.000000227.0000000.000000
max381109.00000085.0000001.00000052.0000001.000000540165.000000163.000000299.0000001.000000
\n", + "
" + ], + "text/plain": [ + " id Age Driving_License Region_Code \\\n", + "count 381109.000000 381109.000000 381109.000000 381109.000000 \n", + "mean 190555.000000 38.822584 0.997869 26.388807 \n", + "std 110016.836208 15.511611 0.046110 13.229888 \n", + "min 1.000000 20.000000 0.000000 0.000000 \n", + "25% 95278.000000 25.000000 1.000000 15.000000 \n", + "50% 190555.000000 36.000000 1.000000 28.000000 \n", + "75% 285832.000000 49.000000 1.000000 35.000000 \n", + "max 381109.000000 85.000000 1.000000 52.000000 \n", + "\n", + " Previously_Insured Annual_Premium Policy_Sales_Channel \\\n", + "count 381109.000000 381109.000000 381109.000000 \n", + "mean 0.458210 30564.389581 112.034295 \n", + "std 0.498251 17213.155057 54.203995 \n", + "min 0.000000 2630.000000 1.000000 \n", + "25% 0.000000 24405.000000 29.000000 \n", + "50% 0.000000 31669.000000 133.000000 \n", + "75% 1.000000 39400.000000 152.000000 \n", + "max 1.000000 540165.000000 163.000000 \n", + "\n", + " Vintage Response \n", + "count 381109.000000 381109.000000 \n", + "mean 154.347397 0.122563 \n", + "std 83.671304 0.327936 \n", + "min 10.000000 0.000000 \n", + "25% 82.000000 0.000000 \n", + "50% 154.000000 0.000000 \n", + "75% 227.000000 0.000000 \n", + "max 299.000000 1.000000 " + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data.describe()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Insight:\n", + "- average customer age is ~38 years old\n", + "- almost all have driving license\n", + "- around half have previous vehicle insurance cover\n", + "- looks like a class imbalance data set" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Get a count of the target variable and note down your observations" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "sns.countplot(x=data.Response)\n", + "plt.title('Vehicle insurance survey response distribution')\n", + "plt.xticks(ticks=[0,1],labels=['Not interested','Interested'])\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Insight:\n", + "- class imbalance" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## What is the ratio of male and female in our dataset?" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "sns.countplot(x=data.Gender)\n", + "plt.title('Gender Distribution in data')\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Insight\n", + "- fairly equal distribution" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Check the gender ratio in the interested customers, what are your observations?" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAZYAAAEGCAYAAABGnrPVAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAPLUlEQVR4nO3df/BldV3H8eerXRZWNEZYyA2BXWxLWSJgkRFpGNC0pEmkmEJt0GyCSZSw0QalGAayRqGaqJkUJiYgAydSfkioyM9RjGVXF3YRNn5EubojIKUEtsTy7o97Vi9fvj8u8bnf+73f7/Mxc+ee8znn3Pt+z9nd155zzz03VYUkSa382KgLkCTNLwaLJKkpg0WS1JTBIklqymCRJDW1eNQFjNqyZctqxYoVoy5DksbK+vXrH6uqPSdbtuCDZcWKFaxbt27UZUjSWEny71Mt81SYJKkpg0WS1JTBIklqymCRJDVlsEiSmjJYJElNGSySpKYMFklSUwaLJKmpBf/N+3u3fJc1H7p01GVI0qxaf95JQ3ttj1gkSU0ZLJKkpgwWSVJTBoskqSmDRZLUlMEiSWrKYJEkNWWwSJKaMlgkSU0ZLJKkpgwWSVJTBoskqSmDRZLUlMEiSWrKYJEkNWWwSJKaMlgkSU0ZLJKkpgwWSVJTBoskqSmDRZLUlMEiSWrKYJEkNWWwSJKaMlgkSU0ZLJKkpgwWSVJTBoskqSmDRZLUlMEiSWrKYJEkNWWwSJKaMlgkSU0ZLJKkpgwWSVJTBoskqSmDRZLUlMEiSWrKYJEkNWWwSJKamlPBkqSSXNY3vzjJo0k+N8N2R8+0jiRpdsypYAGeBA5MsrSbfxPwrRHWI0l6geZasABcD/xyN/124PIdC5IcnuT2JF/vnn9m4sZJdk1ycZI7u/WOm6W6JUnMzWC5AjgxyS7AQcAdfcvuA46qqkOAs4A/mWT7M4Gbquq1wDHAeUl27V8hyclJ1iVZ98xTTwylCUlaqBaPuoCJquruJCvoHa3884TFuwGXJFkFFLDTJC/xZuCtST7Yze8C7Avc2/ceFwIXAuz6ipXVsn5JWujmXLB0rgHOB44G9ugbPxe4uaqO78Lnlkm2DfBrVbV5uCVKkiYzF0+FAVwMnFNVGyeM78aPPsx/9xTbfgF4f5IAJDlkKBVKkiY1J4OlqrZU1V9OsujjwJ8m+QqwaIrNz6V3iuzuJJu6eUnSLJlTp8Kq6qWTjN1Cd8qrqr4K/HTf4j+aZJ0fAKcMtVBJ0pTm5BGLJGl8GSySpKYMFklSUwaLJKkpg0WS1JTBIklqymCRJDVlsEiSmjJYJElNGSySpKYMFklSUwaLJKkpg0WS1JTBIklqymCRJDVlsEiSmjJYJElNGSySpKYMFklSUwaLJKkpg0WS1JTBIklqymCRJDVlsEiSmjJYJElNGSySpKYMFklSUwaLJKkpg0WS1JTBIklqymCRJDVlsEiSmpoxWJIsSvKl2ShGkjT+ZgyWqtoOPJVkt1moR5I05hYPuN7/ABuT3AA8uWOwqk4bSlWSpLE1aLBc1z0kSZrWQMFSVZckWQrsW1Wbh1yTJGmMDRQsSX4FOB9YAqxMcjBwTlW9dZjFzYbXvHIP1p130qjLkKR5Y9DLjc8GDgf+C6CqNgArh1STJGmMDRosz1TV9yaMVetiJEnjb9AP7zcleQewKMkq4DTg9uGVJUkaV4MesbwfWA1sAy4Hvg+cPqyiJEnja9Crwp4CzuwekiRNadpgSXIt03yWMh+uCpMktTXTEcv53fOvAq8A/r6bfzvw8JBqkiSNsWmDpapuBUhyblUd1bfo2iS3DbUySdJYGvTD+z2T7L9jJslKYM/hlCRJGmeDXm78AeCWJA918yuAU4ZSkSRprA16Vdjnu++vvLobuq+qtg2vLEnSuBr0iAVgDb0jlcXAzyWhqi4dSlWSpLE16E0oLwNeBWwAtnfDBRgskqTnGPSI5TDggKry/mCSpGkNelXYJnrfY5EkaVqDHrEsA76RZC29+4UBfvNekvR8gwbL2cMsQpI0fwx6ufGtSfYDVlXVl5K8BFg03NIkSeNooM9YkvwOcCXwyW5ob+CqYRUlSRpfg354fypwJL3fYaGq7gf2GlZRkqTxNWiwbKuqp3fMJFmMP00sSZrEoB/e35rkI8DSJG8C3gtcO7yyZs/TW+/hP8752VGXIY2Ffc/aOOoSNAYGPWI5A3gU2AicDFxXVf6apCTpeaYNliTHJTm1qp6tqouA/eh9C/8jSU6YlQolSWNlpiOWPwCu6ZtfQu9mlEcDvzukmiRJY2ymz1iWVNU3++a/XFWPA48n2XWIdUmSxtRMRywv75+pqvf1zfoLkpKk55kpWO7ovhz5HElOAdYOpyRJ0jib6VTYB4CrkrwD+Fo3tgbYGXjbMAuTJI2naYOlqh4BXp/kDcDqbvi6qrpp6JVJksbSoDehvAkwTCRJMxr0C5KSJA3EYJEkNWWwSJKaMlgkSU0ZLJKkpgwWSVJTBoskqSmDRZLUlMEiSWrKYJEkNWWwSJKaMlgkSU0ZLJKkpgwWSVJTBoskqSmDRZLUlMEiSWrKYJEkNWWwSJKaMlgkSU0ZLJKkpgwWSVJTBoskqSmDRZLUlMEiSWrKYJEkNWWwSJKaMlgkSU0ZLJKkpgwWSVJTQwuWJNuTbOh7rBjiez2cZNmwXl+SNLjFQ3ztH1TVwUN8fUnSHDSrp8KSrElya5L1Sb6QZHk3fkuSv0hyW5J7k7w2yWeS3J/kj/u2v6rb9p4kJ0/xHr+ZZG13lPTJJItmqz9J0nCDZWnfabDPJtkJ+CvghKpaA1wMfLRv/aer6ijgE8DVwKnAgcC7k+zRrfOebtvDgNP6xgFI8hrgN4Aju6Ol7cA7JxaW5OQk65Kse/zJ7U2blqSFbtZOhSU5kF5Q3JAEYBGwtW/9a7rnjcA9VbW12+4hYB/gu/TC5PhuvX2AVd34Dm8E1gB3du+xFHhkYmFVdSFwIcBBey+tF9WlJOk5hhksE4VeYBwxxfJt3fOzfdM75hcnORr4BeCIqnoqyS3ALpO8xyVV9eFmVUuSXpDZ/IxlM7BnkiMAkuyUZPUL2H434D+7UHk18LpJ1rkROCHJXt177J5kvxdbuCRpcLMWLFX1NHAC8LEkdwEbgNe/gJf4PL0jl7uBc4F/meQ9vgH8IfDFbr0bgOUvtnZJ0uBStbA/Yjho76X1uVN+atRlSGNh37M2jroEzRFJ1lfVYZMt85v3kqSmDBZJUlMGiySpKYNFktSUwSJJaspgkSQ1ZbBIkpoyWCRJTRkskqSmDBZJUlMGiySpKYNFktSUwSJJaspgkSQ1ZbBIkpoyWCRJTRkskqSmDBZJUlMGiySpKYNFktSUwSJJaspgkSQ1ZbBIkpoyWCRJTRkskqSmDBZJUlMGiySpKYNFktSUwSJJaspgkSQ1ZbBIkpoyWCRJTRkskqSmDBZJUlMGiySpKYNFktSUwSJJaspgkSQ1tXjUBYzakuWr2fesdaMuQ5LmDY9YJElNGSySpKYMFklSUwaLJKkpg0WS1JTBIklqymCRJDVlsEiSmjJYJElNGSySpKZSVaOuYaSSPAFsHnUdQ7QMeGzURQyR/Y03+xtf+1XVnpMtWPD3CgM2V9Vhoy5iWJKss7/xZX/jbb73NxVPhUmSmjJYJElNGSxw4agLGDL7G2/2N97me3+TWvAf3kuS2vKIRZLUlMEiSWpqQQdLkl9KsjnJA0nOGHU9g0rycJKNSTYkWdeN7Z7khiT3d88v71v/w12Pm5P8Yt/4mu51HkhyQZKMqJ+LkzySZFPfWLN+kuyc5NPd+B1JVsyB/s5O8q1uH25IcuwY97dPkpuT3JvkniS/143Pi304TX/zZh82V1UL8gEsAh4E9geWAHcBB4y6rgFrfxhYNmHs48AZ3fQZwMe66QO63nYGVnY9L+qWrQWOAAJcD7xlRP0cBRwKbBpGP8B7gU900ycCn54D/Z0NfHCSdcexv+XAod30y4B/7fqYF/twmv7mzT5s/VjIRyyHAw9U1UNV9TRwBXDciGt6MY4DLummLwHe1jd+RVVtq6p/Ax4ADk+yHPjxqvpq9f40X9q3zayqqtuAxycMt+yn/7WuBN44m0dnU/Q3lXHsb2tVfa2bfgK4F9ibebIPp+lvKmPV3zAs5GDZG/hm3/wWpv/DMpcU8MUk65Oc3I39RFVthd5fBGCvbnyqPvfupieOzxUt+/nhNlX1DPA9YI+hVT649yW5uztVtuM00Vj3153COQS4g3m4Dyf0B/NwH7awkINlsv8NjMu110dW1aHAW4BTkxw1zbpT9Tmu/f9/+pmLvf4N8CrgYGAr8Gfd+Nj2l+SlwD8Bp1fV96dbdZKxOd/jJP3Nu33YykIOli3APn3zrwS+PaJaXpCq+nb3/AjwWXqn9b7THWrTPT/SrT5Vn1u66Ynjc0XLfn64TZLFwG4MfmpqKKrqO1W1vaqeBS6itw9hTPtLshO9f3Q/VVWf6YbnzT6crL/5tg9bWsjBciewKsnKJEvofWB2zYhrmlGSXZO8bMc08GZgE73a39Wt9i7g6m76GuDE7qqTlcAqYG13auKJJK/rzuWe1LfNXNCyn/7XOgG4qTvHPTI7/sHtHE9vH8IY9tfV87fAvVX1532L5sU+nKq/+bQPmxv11QOjfADH0rvC40HgzFHXM2DN+9O74uQu4J4dddM7H3sjcH/3vHvfNmd2PW6m78ov4DB6fxkeBP6a7k4MI+jpcnqnEv6X3v/cfrtlP8AuwD/S+xB1LbD/HOjvMmAjcDe9f1SWj3F/P0/vtM3dwIbucex82YfT9Ddv9mHrh7d0kSQ1tZBPhUmShsBgkSQ1ZbBIkpoyWCRJTRkskqSmDBZpHkhyepKXjLoOCfwFSWleSPIwcFhVPTbqWiSPWKRZkuSk7oaFdyW5LMl+SW7sxm5Msm+33t8lOaFvu//uno9OckuSK5Pcl+RT6TkN+Eng5iQ3j6Y76UcWj7oAaSFIspret7GPrKrHkuxO7zbpl1bVJUneA1zAzD9dcAiwmt49pr7Svd4FSX4fOMYjFs0FHrFIs+MNwJU7/uGvqsfp/eDTP3TLL6N365CZrK2qLdW78eEGYMUQapVeFINFmh1h5tug71j+DN3fze5mhUv61tnWN70dzzpoDjJYpNlxI/DrSfaA3u/BA7fTu6s2wDuBL3fTDwNruunjgJ0GeP0n6P1srjRy/m9HmgVVdU+SjwK3JtkOfB04Dbg4yYeAR4Hf6la/CLg6yVp6gfTkAG9xIXB9kq1VdUz7DqTBebmxJKkpT4VJkpoyWCRJTRkskqSmDBZJUlMGiySpKYNFktSUwSJJaur/AP97ZkXQY9j6AAAAAElFTkSuQmCC\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "filter_df = data.Gender.loc[data.Response==1]\n", + "sns.countplot(y=filter_df)" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAZYAAAEGCAYAAABGnrPVAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAQnklEQVR4nO3de5BkZXnH8e8vuyALIiqsitwWEkSBIMglIimyolLGGNFIghhLjZZQSkBNiCWSEAPBVIIVb2UZiCHiJRjLKCJGVuRmxOheFFnWZUMKiWzcCIgXAgQKfPJHnymbdXa3B96enrPz/VRNdZ+3z+l+ntmZ+e05p/s9qSokSWrllyZdgCRp62KwSJKaMlgkSU0ZLJKkpgwWSVJTCyddwKTtsssutWTJkkmXIUm9smrVqjuravF0j837YFmyZAkrV66cdBmS1CtJ/mtTj3koTJLUlMEiSWrKYJEkNWWwSJKaMlgkSU0ZLJKkpgwWSVJTBoskqSmDRZLU1Lz/5P3a9T/k0D/56KTLkKRZteq8V4/tud1jkSQ1ZbBIkpoyWCRJTRkskqSmDBZJUlMGiySpKYNFktSUwSJJaspgkSQ1ZbBIkpoyWCRJTRkskqSmDBZJUlMGiySpKYNFktSUwSJJaspgkSQ1ZbBIkpoyWCRJTRkskqSmDBZJUlMGiySpKYNFktSUwSJJaspgkSQ1ZbBIkpoyWCRJTRkskqSmDBZJUlMGiySpKYNFktSUwSJJaspgkSQ1ZbBIkpoyWCRJTRkskqSmDBZJUlMGiySpKYNFktSUwSJJampOBUuSSvKxoeWFSe5IctkWtlu6pXUkSbNjTgULcA9wYJJF3fILgP+eYD2SpBmaa8EC8EXgt7r7JwIXTz2Q5IgkX0vyre52v403TrJDkguTrOjWO26W6pYkMTeD5ZPAK5JsBxwEfGPosZuAo6vqEOAs4F3TbH8mcFVVHQ48FzgvyQ7DKyQ5KcnKJCsfvPfusTQhSfPVwkkXsLGquiHJEgZ7K/+60cM7ARcl2RcoYJtpnuJY4CVJTu+WtwP2BNYOvcYFwAUAOzxl72pZvyTNd3MuWDqXAu8GlgI7D42fA1xdVS/rwueaabYN8PKqWjfeEiVJ05mLh8IALgTOrqrVG43vxM9P5r92E9suA05NEoAkh4ylQknStOZksFTV+qp63zQP/Q3wV0muAxZsYvNzGBwiuyHJjd2yJGmWzKlDYVX12GnGrqE75FVV/w48bejhP5tmnfuAk8daqCRpk+bkHoskqb8MFklSUwaLJKkpg0WS1JTBIklqymCRJDVlsEiSmjJYJElNGSySpKYMFklSUwaLJKkpg0WS1JTBIklqymCRJDVlsEiSmjJYJElNGSySpKYMFklSUwaLJKkpg0WS1JTBIklqymCRJDVlsEiSmjJYJElNGSySpKYMFklSUwaLJKkpg0WS1JTBIklqymCRJDVlsEiSmjJYJElNbTFYkixI8uXZKEaS1H9bDJaqegi4N8lOs1CPJKnnFo643v8Bq5NcAdwzNVhVp42lKklSb40aLF/oviRJ2qyRgqWqLkqyCNizqtaNuSZJUo+NFCxJfht4N7AtsHeSg4Gzq+ol4yxuNjxj951Zed6rJ12GJG01Rn278TuBI4AfA1TV9cDeY6pJktRjowbLg1X1k43GqnUxkqT+G/Xk/Y1JXgksSLIvcBrwtfGVJUnqq1H3WE4FDgDuBy4Gfgq8ZVxFSZL6a9R3hd0LnNl9SZK0SZsNliSfZzPnUraGd4VJktra0h7Lu7vb3wGeAny8Wz4RuHVMNUmSemyzwVJV1wIkOaeqjh566PNJvjLWyiRJvTTqyfvFSfaZWkiyN7B4PCVJkvps1LcbvxW4Jskt3fIS4OSxVCRJ6rVR3xV2eff5lad3QzdV1f3jK0uS1Fej7rEAHMpgT2Uh8MwkVNVHx1KVJKm3Rp2E8mPALwPXAw91wwUYLJKkhxl1j+UwYP+qcn4wSdJmjfqusBsZfI5FkqTNGnWPZRfgO0mWM5gvDPCT95KkXzRqsLxznEVIkrYeo77d+NokewH7VtWXk2wPLBhvaZKkPhrpHEuSNwCfBs7vhnYDLhlXUZKk/hr15P0pwFEMrsNCVd0MPGlcRUmS+mvUYLm/qh6YWkiyEC9NLEmaxqgn769N8g5gUZIXAG8CPj++smbPAxvW8L2zf3XSZUi9sOdZqyddgnpg1D2WtwN3AKuBk4AvVJVXk5Qk/YLNBkuS45KcUlU/q6q/B/Zi8Cn8dyQ5flYqlCT1ypb2WN4GXDq0vC2DySiXAm8cU02SpB7b0jmWbavqtqHlr1bVXcBdSXYYY12SpJ7a0h7LE4YXquoPhxa9gqQk6RdsKVi+0X048mGSnAwsH09JkqQ+29KhsLcClyR5JfDNbuxQ4DHAS8dZmCSpnzYbLFV1O/CcJMcAB3TDX6iqq8ZemSSpl0adhPIqwDCRJG3RqB+QlCRpJAaLJKkpg0WS1JTBIklqymCRJDVlsEiSmjJYJElNGSySpKYMFklSUwaLJKkpg0WS1JTBIklqymCRJDVlsEiSmjJYJElNGSySpKYMFklSUwaLJKkpg0WS1JTBIklqymCRJDVlsEiSmjJYJElNGSySpKYMFklSUwaLJKkpg0WS1JTBIklqymCRJDVlsEiSmhpbsCR5KMn1Q19LxvhatybZZVzPL0ka3cIxPvd9VXXwGJ9fkjQHzeqhsCSHJrk2yaoky5Ls2o1fk+Q9Sb6SZG2Sw5N8JsnNSf5yaPtLum3XJDlpE6/xqiTLu72k85MsmK3+JEnjDZZFQ4fBPptkG+ADwPFVdShwIXDu0PoPVNXRwN8BnwNOAQ4EXptk526d13XbHgacNjQOQJJnACcAR3V7Sw8Bv79xYUlOSrIyycq77nmoadOSNN/N2qGwJAcyCIorkgAsADYMrX9pd7saWFNVG7rtbgH2AH7IIExe1q23B7BvNz7lecChwIruNRYBt29cWFVdAFwAcNBui+pRdSlJephxBsvGwiAwjtzE4/d3tz8buj+1vDDJUuD5wJFVdW+Sa4DtpnmNi6rqjGZVS5JmZDbPsawDFic5EiDJNkkOmMH2OwE/6kLl6cCzp1nnSuD4JE/qXuOJSfZ6tIVLkkY3a8FSVQ8AxwN/neTbwPXAc2bwFJcz2HO5ATgH+Po0r/Ed4E+BL3XrXQHs+mhrlySNLlXz+xTDQbstqstO/pVJlyH1wp5nrZ50CZojkqyqqsOme8xP3kuSmjJYJElNGSySpKYMFklSUwaLJKkpg0WS1JTBIklqymCRJDVlsEiSmjJYJElNGSySpKYMFklSUwaLJKkpg0WS1JTBIklqymCRJDVlsEiSmjJYJElNGSySpKYMFklSUwaLJKkpg0WS1JTBIklqymCRJDVlsEiSmjJYJElNGSySpKYMFklSUwaLJKkpg0WS1JTBIklqymCRJDVlsEiSmjJYJElNGSySpKYMFklSUwaLJKkpg0WS1NTCSRcwadvuegB7nrVy0mVI0lbDPRZJUlMGiySpKYNFktSUwSJJaspgkSQ1ZbBIkpoyWCRJTRkskqSmDBZJUlMGiySpqVTVpGuYqCR3A+smXUdjuwB3TrqIhuxnbrOfuW8cPe1VVYune2DezxUGrKuqwyZdREtJVm5NPdnP3GY/c99s9+ShMElSUwaLJKkpgwUumHQBY7C19WQ/c5v9zH2z2tO8P3kvSWrLPRZJUlMGiySpqXkdLElemGRdkv9M8vZJ1zNTSS5McnuSG4fGnpjkiiQ3d7dPmGSNM5FkjyRXJ1mbZE2SN3fjvewpyXZJlif5dtfPX3TjvexnSpIFSb6V5LJuue/93JpkdZLrk6zsxnrbU5LHJ/l0kpu636UjZ7ufeRssSRYAHwR+E9gfODHJ/pOtasY+Arxwo7G3A1dW1b7Ald1yXzwI/HFVPQN4NnBK92/S157uB46pqmcCBwMvTPJs+tvPlDcDa4eW+94PwHOr6uChz3r0uaf3AZdX1dOBZzL4t5rdfqpqXn4BRwLLhpbPAM6YdF2PoI8lwI1Dy+uAXbv7uzL4AOjE63yEvX0OeMHW0BOwPfBN4Nf63A+we/eH6Rjgsm6st/10Nd8K7LLRWC97Ah4HfJfujVmT6mfe7rEAuwG3DS2v78b67slVtQGgu33ShOt5RJIsAQ4BvkGPe+oOG10P3A5cUVW97gd4L/A24GdDY33uB6CALyVZleSkbqyvPe0D3AH8Y3e48sNJdmCW+5nPwZJpxnzv9RyQ5LHAvwBvqaqfTrqeR6OqHqqqgxn8T/+IJAdOuqZHKsmLgduratWka2nsqKp6FoPD4qckOXrSBT0KC4FnAR+qqkOAe5jAYbz5HCzrgT2GlncHvj+hWlr6QZJdAbrb2ydcz4wk2YZBqHyiqj7TDfe6J4Cq+jFwDYNzYn3t5yjgJUluBT4JHJPk4/S3HwCq6vvd7e3AZ4Ej6G9P64H13Z4xwKcZBM2s9jOfg2UFsG+SvZNsC7wCuHTCNbVwKfCa7v5rGJyn6IUkAf4BWFtVfzv0UC97SrI4yeO7+4uA5wM30dN+quqMqtq9qpYw+H25qqpeRU/7AUiyQ5Idp+4DxwI30tOequp/gNuS7NcNPQ/4DrPcz7z+5H2SFzE4ZrwAuLCqzp1wSTOS5GJgKYMpsX8A/DlwCfApYE/ge8DvVtVdk6pxJpL8OvBvwGp+fgz/HQzOs/SupyQHARcx+Pn6JeBTVXV2kp3pYT/DkiwFTq+qF/e5nyT7MNhLgcFhpH+qqnN73tPBwIeBbYFbgD+g+/ljlvqZ18EiSWpvPh8KkySNgcEiSWrKYJEkNWWwSJKaMlgkSU0ZLFIPJXlLku0nXYc0Hd9uLM2SJAur6sFGz3UrcFhV3dni+aSW3GORZiDJku46FxcluaG77sX2SQ5Ncm03keGyoekzrknyriTXAm9OcniSr3XXaFmeZMduosrzkqzonvPkbtul3fZT19b4RAZOA54KXJ3k6m7dDyVZmaHrvnTjL+q2/WqS9w9dQ2WHDK7ns6KbrPC4Wf9maqu1cNIFSD20H/D6qrouyYXAKcDLgOOq6o4kJwDnAq/r1n98Vf1GN3XQTcAJVbUiyeOA+4DXAz+pqsOTPAa4LsmXum0PAQ5gMI/ddQwmTHx/kj9icA2RqT2WM6vqru46Q1d2n/r/D+B84Oiq+m43U8OUMxlMyfK6btqZ5Um+XFX3jOH7pXnGYJFm7raquq67/3EG084cCFwxmO6MBcCGofX/ubvdD9hQVSsApmZuTnIscFCS47v1dgL2BR4AllfV+m696xlcf+er09T0e92U7wsZXG9jfwZHJG6pqu9261wMTE0LfyyDCSVP75a3YzDdx/AFvKRHxGCRZm7jE5N3A2uq6shNrD+1F5Bptp0aP7Wqlj1scDAf1/1DQw8xze9skr2B04HDq+pHST7CICimuzTE8Gu+vKrWbWYd6RHxHIs0c3smmQqRE4GvA4unxpJsk+SAaba7CXhqksO79XZMshBYBryxu2QASZ7WzbS7OXcDO3b3H8cgvH6S5MkMrisy9Xr7ZHDRNIAThrZfBpzazShNkkO23LY0GvdYpJlbC7wmyfnAzcAHGPyhfn+SnRj8Xr0XWDO8UVU90J1/+UA3jf59DKbS/zCDQ1zf7P7Q3wG8dAs1XAB8McmGqnpukm91r3cLg3MxVNV9Sd4EXJ7kTmD50PbndDXe0L3mrcCLH8k3Q9qYbzeWZqD73/9lVdWLK0EmeWxV/W8XHh8Ebq6q90y6Lm3dPBQmbd3e0J30X8PgTQHnT7gezQPusUiSmnKPRZLUlMEiSWrKYJEkNWWwSJKaMlgkSU39P04nYgXwcqj+AAAAAElFTkSuQmCC\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "# use of estimator\n", + "sns.barplot(y=filter_df,\n", + " x=filter_df.index,\n", + " estimator=lambda grp: len(grp) / float(len(filter_df)) * 100)\n", + "plt.xlabel('percentage')\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Insight:\n", + "- Males are more likely to be interested in Vehicle Insurance than Females" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Find out the distribution of customers age" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "sns.kdeplot(x=\"Age\",\n", + " hue='Response',\n", + " data=data,\n", + " shade=True)\n", + "plt.title('Age distribution according to the Response')\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "if you are facing issues with kdeplot ensure you have latest version of seaborn library installed `pip install -U seaborn`" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "seaborn==0.11.0\n" + ] + } + ], + "source": [ + "!pip freeze | grep seaborn" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Insight:\n", + "- young people below 30 years are not interested, second group 40-50 years are not interested \n", + "- middle aged people 30-60 years are interested\n", + "\n", + "Inshort, age plays a role in determining interest, below 30 is a strong no, but between 40-50 there may be a yes or a no." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Which regions have people applied from more?" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "data.Region_Code.value_counts(ascending=True).plot.barh(figsize=(12,10))\n", + "plt.title('Customer base by region')\n", + "plt.xlabel('count')\n", + "plt.ylabel('Region code')\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Insight:\n", + "- Region code 28 is the region with high number of existing customers\n", + "- its a diminishing curve" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXgAAAEWCAYAAABsY4yMAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAWtUlEQVR4nO3debhlVX3m8e9rFbNQgDgwSQlOjcGgogajiXFE1EBr0oIjihrbWaM2qJ2ITSuatKGNsdFocEJxHoJzq4hGWigUFUHCVEgBDgwyiNJQ/es/9io4dbm36sC9h3Nr1ffzPOepPe/fXvfWW+usvc+pVBWSpP7cYdoFSJImw4CXpE4Z8JLUKQNekjplwEtSpwx4SeqUAb8BSPKmJB+5Hc93bZLdb+O+K5M85jbue2KS59+WfSchg2OTXJnklGnXM1OSZyT52rTrmEuSRyZZNe06NmYG/CKR5OlJVrRwvTTJl5M8fBq1VNUdq+r8aZx7kXk48Fhgl6p6yLSLmamqjquqx027jklI8tokZyS5JskFSV47Y/3eSb6T5Kokq5L8zbRqXcwM+EUgyauBo4G3AHcF7g68GzhgmnWJ3YCVVfXbW7tjkqULsc1GLMCzge2A/YCXJjloZP1HgZOA7YE/Bf5zkj+/3atc5Az4KUuyDHgz8JKq+kxV/baqbqiqf62q186xzyeT/KL1Xk5Kcr+RdfsnObP1fC5O8pq2fIckJyT5TZIrWu9n1p9/kkpyzzb9gSTvbu8ork3yb0nuluToNnTxsyQPmHGIB7carmxDHJu3Y23Xavh1W3dCkl3mqGGPJN9McnmSy5Icl2TbkfUrk7wmyY9bO3x8zXna+gOSnJ7k6iTnJdlvTXsneX97l3RxkiOTLJnl/IcC7wP2bdd9RFv+giTntjb8QpKdZrTbS5KcA5wzyzGXt20OTfJz4Jtt+fOSnNXa5KtJdhvZ53FJzm7X+O4k314zjJXkkCTfHdn2YUlObduemuRhI+tOTPLf2s/vmiRfS7LDbG2/nvbbqV33Fa0dXjCyzxbt9+XKJGcCD55xzJ2SfLr9/C9I8vK5zl9Vb6+qH1TVjVV1NvB54I9HNlkOHFdVq6vqPOC7wP1mOdTGrap8TfHF0Du5EVi6jm3eBHxkZP55wNbAZgw9/9NH1l0KPKJNbwc8sE2/FTgG2KS9HgFkjvMVcM82/QHgMuBBwOYMoXQBQ+9qCXAk8K2RfVcCZwC7MvSu/g04sq27E/BUYMtW/yeBz43seyLw/DZ9T4bhkc2AOzP01o6ecZ5TgJ3aec4CXtTWPQS4qu1/B2Bn4L5t3eeA9wBbAXdpx/irOdrhEOC7I/OPam3xwFbXPwInzWi3r7d6tpjleMvbNh9q598COBA4F/gPwFLgjcD32vY7AFcDT2nrXgHcMNJGN9XXznkl8Ky27cFt/k4jbXsecO923hOBo+a47nW137cZ3l1uDuwN/Bp4dFt3FPCdVsuu7fdgVVt3B+A04G+ATYHdgfOBx4/xdyTAD9f8fNuyt7TzbQLcB1gFPHjaf58X22vqBWzsL+AZwC/Ws82bGAn4Geu2baGxrM3/HPgrYJsZ272ZoRd0zzFqmhnw/zyy7mXAWSPzewG/GZlfOeMv4v7AeXOcZ2/gypH5E9eE1yzbHgj8cMZ5njky/3bgmDb9HuAfZjnGXYHrGQnfFoTfmuOcNwVom38/8PaR+TsyBO7ykXZ71DradXnbZveRZV8GDh2ZvwNwHcPw0LOBk0fWBbiI2QP+WcApM853MnDISNu+cWTdi4GvzFHnXO23K7Aa2Hpk2VuBD7Tp84H9Rta9kJsD/qHAz2cc73Dg2DF+H48AfgRsNrLsYQz/MN7Y2vSIW/t3b2N4OUQzfZcDO4w7HptkSZKj2tvmqxmCDobeHgw95P2BC9vb+X3b8r9j+AvxtSTnJznsVtT4y5Hp380yf8cZ2180Mn0hQy+bJFsmeU+SC1vtJwHbzjFEcpckx7dhlKuBj4xc4xq/GJm+bqSOXRl6qzPtxtDjuzTDUNVvGMLsLrNsO5ud2vUAUFXXMvz8dh7Z5qKZO81idJvdgP85Us8VDEG+czvfTdvWkGxzPZWyVm3NhTNqm6u9Zpqr/XYCrqiqa+Y4x1r1zqhnN2CnNdfZrvX1DP/ozinJSxn+oXtiVV3flm0PfIWh07J5q/fxSV68rmNtjAz46TsZ+D1DD3UcT2e4+foYYBlDrxCGUKCqTq2qAxhC63PAJ9rya6rqr6tqd+DJwKuTPHqhLmKGXUem7w5c0qb/muHt9EOrahvgT0Zrn+GtDD2z+7dtnznHdrO5CNhjjuXXAztU1bbttU1VjTt2ewlDUA1FJ1sxDDtdPLLNOF/POrrNRQxDRNuOvLaoqu8xDLfddI8iSUbn11Vbc/cZtY1rrva7BNg+ydZznONSbvmzHz3mBTOuc+uq2n+uIpI8DziMYQho9B+23YHVVfWhGsboVwHHM3RsNMKAn7KquophXPKfkhzYermbJHlCkrfPssvWDCF1OcNY9lvWrEiyaYZno5dV1Q0M47er27onJblnC4k1y1dP6LJekmSX1tN6PfDxkdp/B/ymrfvbdRxja+Datu3OwKw3nOfwfuC5SR6d5A5Jdk5y36q6FPga8D+SbNPW7ZHkT8c87kfbcfdOshlD23+/qlbeitpmOgY4PO1GeYabwH/Z1n0R2Kv9XiwFXgLcbY7jfAm4d4bHbZcmeRqwJ3DCbahprva7CPge8NYkmye5P3AocFzb7xPtWrbLcPP8ZSPHPAW4Osl/aTdjlyT5gyRr3YhdI8kzGNr3sXXLR3b/fdgkT2/13Q14GsMwjkYY8ItAVb0DeDXDDbZfM/R2XsrQA5/pQwxvfS8GzgT+z4z1zwJWtmGNFzH0fAHuBfxvhtA8GXh3VZ24oBdys48yBOn57XVkW340ww2+y1rdX1nHMY5guJl5FUPQfWbck1fVKcBzgX9o+3+bm3u3z2a4yXcmw03ITwE7jnncbwD/Ffg0Q291D+Cgde60/mN+FngbcHz7mZ0BPKGtuwz4S4b7C5czBPYKhn/gZx7ncuBJDO+SLgdeBzypHePW1rSu9juY4V3jJcBngb+tqq+3dUcw/G5ewPDz//DIMVczvHPcu62/jOEppWVzlHEkw7ujUzM8xXRtkmPasdbceH4Vw8/wdIZ2+++39lp7l3bDQtIil+Gx1lXAM6rqW9OuR4ufPXhpEUvy+CTbtiGh1zPch5j5rk2alQEvLW77MjzRchnDEMeBVfW76ZakDYVDNJLUKXvwktSpRfVlRzvssEMtX7582mVI0gbjtNNOu6yq7jzbukUV8MuXL2fFihXTLkOSNhhJZn6C+SYO0UhSpwx4SeqUAS9JnTLgJalTBrwkdcqAl6ROGfCS1CkDXpI6ZcBLUqcMeEnqlAEvSZ0y4CWpUwa8JHXKgJekThnwktQpA16SOmXAS1KnDHhJ6pQBL0mdMuAlqVMGvCR1yoCXpE4Z8JLUKQNekjplwEtSpwx4SerU0mkXMOonF1/F8sO+OO0yFrWVRz1x2iVI2kDYg5ekThnwktQpA16SOmXAS1KnDHhJ6pQBL0mdMuAlqVMGvCR1yoCXpE4Z8JLUKQNekjplwEtSpwx4SeqUAS9JnTLgJalTBrwkdcqAl6ROGfCS1CkDXpI6ZcBLUqcMeEnqlAEvSZ0y4CWpUxML+CT/kuRXSc6Y1DkkSXObZA/+A8B+Ezy+JGkdJhbwVXUScMWkji9JWrepj8EneWGSFUlWrL7uqmmXI0ndmHrAV9V7q2qfqtpnyZbLpl2OJHVj6gEvSZoMA16SOjXJxyQ/BpwM3CfJqiSHTupckqRbWjqpA1fVwZM6tiRp/RyikaROGfCS1CkDXpI6ZcBLUqcMeEnqlAEvSZ0y4CWpUwa8JHXKgJekThnwktQpA16SOmXAS1KnDHhJ6pQBL0mdMuAlqVMGvCR1yoCXpE4Z8JLUKQNekjplwEtSpwx4SerU0mkXMGqvnZex4qgnTrsMSeqCPXhJ6pQBL0mdMuAlqVMGvCR1yoCXpE6NHfBJtkhyn0kWI0laOGMFfJInA6cDX2nzeyf5wiQLkyTNz7g9+DcBDwF+A1BVpwPLJ1OSJGkhjBvwN1bVVROtRJK0oMb9JOsZSZ4OLElyL+DlwPcmV5Ykab7G7cG/DLgfcD3wMeBq4JWTKkqSNH9j9eCr6jrgDcAbkiwBtqqq30+0MknSvIz7FM1Hk2yTZCvgp8DZSV472dIkSfMx7hDNnlV1NXAg8CXg7sCzJlaVJGnexg34TZJswhDwn6+qG4CaXFmSpPkaN+DfA6wEtgJOSrIbw41WSdIiNe5N1ncC7xxZdGGSP5tMSZKkhTBWwCfZDHgqw6dXR/d58wRqkiQtgHE/6PR54CrgNIZn4SVJi9y4Ab9LVe030UokSQtq3Jus30uy10QrkSQtqHF78A8HDklyAcMQTYCqqvtPrDJJ0ryMG/BPmGgVkqQFN9YQTVVdCGwLPLm9tm3LJEmL1LjfRfMK4DjgLu31kSQvm2RhkqT5GXeI5lDgoVX1W4AkbwNOBv5xUoVJkuZn3KdoAqwemV/dlkmSFqlxe/DHAt9P8lmGYD8AeP/EqpIkzdu430XzjiQnMjwuCfDcqvrhxKqSJM3buEM0a4Tha4IdnpGkRW7cp2j+BvggsB2wA3BskjdOsjBJ0vyMOwZ/MPCANf8Pa5KjgB8AR06qMEnS/Iw7RLMS2HxkfjPgvAWvRpK0YMbtwV8P/DTJ1xnG4B8LfDfJOwGq6uUTqk+SdBuNG/Cfba81Tlz4UiRJC2ncxyQ/uGY6yXbArlX144lVJUmat3GfojkxyTZJtgd+xPAUzTsmW5okaT7Gvcm6rKquBp4CHFtVDwIeM7myJEnzNW7AL02yI/CfgBMmWI8kaYGMG/BvBr4KnFdVpybZHThncmVJkuZr3JusnwQ+OTJ/PvDUSRUlSZq/cW+y3jvJN5Kc0ebv71cVSNLiNu4QzT8DhwM3ALRHJA+aVFGSpPkbN+C3rKpTZiy7caGLkSQtnHED/rIkezB8TQFJ/gK4dGJVSZLmbdyvKngJ8F7gvkkuBi4AnjGxqiRJ8zbuUzTnA49JshVDr/93wNOACydYmyRpHtY5RNO+nuDwJO9K8ljgOuA5wLkMH3qSJC1S6+vBfxi4EjgZeAHwOmBT4MCqOn3CtUmS5mF9Ab97Ve0FkOR9wGXA3avqmolXJkmal/U9RXPDmomqWg1cYLhL0oZhfT34P0xydZsOsEWbD1BVtc1Eq5Mk3WbrDPiqWnJ7FSJJWljjPgd/u/jJxVex/LAvTrsMSbrdrDzqiRM79rifZJUkbWAMeEnqlAEvSZ0y4CWpUwa8JHXKgJekThnwktQpA16SOmXAS1KnDHhJ6pQBL0mdMuAlqVMGvCR1yoCXpE4Z8JLUKQNekjplwEtSpwx4SeqUAS9JnTLgJalTBrwkdcqAl6ROGfCS1KmJBnyS/ZKcneTcJIdN8lySpLVNLOCTLAH+CXgCsCdwcJI9J3U+SdLaJtmDfwhwblWdX1X/FzgeOGCC55MkjZhkwO8MXDQyv6otW0uSFyZZkWTF6uuummA5krRxmWTAZ5ZldYsFVe+tqn2qap8lWy6bYDmStHGZZMCvAnYdmd8FuGSC55MkjZhkwJ8K3CvJPZJsChwEfGGC55MkjVg6qQNX1Y1JXgp8FVgC/EtV/XRS55MkrW1iAQ9QVV8CvjTJc0iSZucnWSWpUwa8JHXKgJekThnwktQpA16SOmXAS1KnDHhJ6pQBL0mdMuAlqVMGvCR1yoCXpE4Z8JLUKQNekjplwEtSpwx4SeqUAS9JnTLgJalTBrwkdcqAl6ROGfCS1CkDXpI6ZcBLUqeWTruAUXvtvIwVRz1x2mVIUhfswUtSpwx4SeqUAS9JnTLgJalTBrwkdcqAl6ROGfCS1CkDXpI6ZcBLUqcMeEnqlAEvSZ0y4CWpUwa8JHXKgJekThnwktQpA16SOmXAS1KnDHhJ6pQBL0mdMuAlqVMGvCR1yoCXpE4Z8JLUKQNekjplwEtSpwx4SepUqmraNdwkyTXA2dOuY5HbAbhs2kVsAGyn8dhO67fY22i3qrrzbCuW3t6VrMfZVbXPtItYzJKssI3Wz3Yaj+20fhtyGzlEI0mdMuAlqVOLLeDfO+0CNgC20Xhsp/HYTuu3wbbRorrJKklaOIutBy9JWiAGvCR1alEEfJL9kpyd5Nwkh027nklLsmuSbyU5K8lPk7yiLd8+ydeTnNP+3G5kn8Nb+5yd5PEjyx+U5Cdt3TuTpC3fLMnH2/LvJ1l+e1/nQkmyJMkPk5zQ5m2nGZJsm+RTSX7Wfq/2tZ3WluRV7e/bGUk+lmTz7tuoqqb6ApYA5wG7A5sCPwL2nHZdE77mHYEHtumtgX8H9gTeDhzWlh8GvK1N79naZTPgHq29lrR1pwD7AgG+DDyhLX8xcEybPgj4+LSvex7t9Wrgo8AJbd52umUbfRB4fpveFNjWdlqrfXYGLgC2aPOfAA7pvY0WQ8PvC3x1ZP5w4PBp13U7t8HngccyfIp3x7ZsR4YPft2iTYCvtnbbEfjZyPKDgfeMbtOmlzJ8Ei/Tvtbb0Da7AN8AHjUS8LbT2m20TQuvzFhuO918LTsDFwHbt/pPAB7XexsthiGaNQ2/xqq2bKPQ3sY9APg+cNequhSg/XmXttlcbbRzm565fK19qupG4CrgTpO4hgk7Gngd8P9GltlOa9sd+DVwbBvKel+SrbCdblJVFwN/D/wcuBS4qqq+RudttBgCPrMs2yie3UxyR+DTwCur6up1bTrLslrH8nXts8FI8iTgV1V12ri7zLKs+3Zi6C0+EPhfVfUA4LcMww1z2ejaqY2tH8Aw3LITsFWSZ65rl1mWbXBttBgCfhWw68j8LsAlU6rldpNkE4ZwP66qPtMW/zLJjm39jsCv2vK52mhVm565fK19kiwFlgFXLPyVTNQfA3+eZCVwPPCoJB/BdpppFbCqqr7f5j/FEPi2080eA1xQVb+uqhuAzwAPo/M2WgwBfypwryT3SLIpw82JL0y5polqd93fD5xVVe8YWfUF4Dlt+jkMY/Nrlh/U7tLfA7gXcEp7S3lNkj9qx3z2jH3WHOsvgG9WGxzcUFTV4VW1S1UtZ/i9+GZVPRPbaS1V9QvgoiT3aYseDZyJ7TTq58AfJdmyXdujgbPovY2mffOjXf/+DE+SnAe8Ydr13A7X+3CGt24/Bk5vr/0Zxuu+AZzT/tx+ZJ83tPY5m3bXvi3fBzijrXsXN386eXPgk8C5DHf9d5/2dc+zzR7JzTdZbadbts/ewIr2O/U5YDvb6RZtdATws3Z9H2Z4QqbrNvKrCiSpU4thiEaSNAEGvCR1yoCXpE4Z8JLUKQNekjplwGujk+RuSY5Pcl6SM5N8Kcm9F/D4j0zysIU6nnRbGfDaqLQPp3wWOLGq9qiqPYHXA3ddwNM8kuFTktJUGfDa2PwZcENVHbNmQVWdDnw3yd+17wr/SZKnwU298RPWbJvkXUkOadMrkxyR5Adtn/u2L497EfCqJKcnecTteG3SWpZOuwDpdvYHwGxfXvYUhk+D/iGwA3BqkpPGON5lVfXAJC8GXlNVz09yDHBtVf39glUt3Qb24KXBw4GPVdXqqvol8G3gwWPst+aL4k4Dlk+oNuk2MeC1sfkp8KBZls/2Va8AN7L235PNZ6y/vv25Gt8Ra5Ex4LWx+SawWZIXrFmQ5MHAlcDTMvz/r3cG/oThC6MuBPZs3yq4jOFbCNfnGob/ilGaKnsc2qhUVSX5j8DRGf6D998DK4FXAndk+H84C3hdDV/DS5JPMHxL4znAD8c4zb8Cn0pyAPCyqvrOgl+INAa/TVKSOuUQjSR1yoCXpE4Z8JLUKQNekjplwEtSpwx4SeqUAS9Jnfr/Ezb4Q/tow7oAAAAASUVORK5CYII=\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "data.loc[data.Region_Code==28.0,'Response'].value_counts().plot.barh(title='Class imbalance for region code 28')\n", + "plt.ylabel('Response')\n", + "plt.xlabel('Count')\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "There's still an imbalance in region coded as `28`" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Check the ratio of previously insured, note down your observations" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "pd.crosstab(data.Response, data.Previously_Insured).plot.barh(title='Interest for buying vehicle insurance if they already have a insurance')\n", + "plt.xlabel('Count')\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Insight\n", + "- people who are not insured previsouly are mostly like to be interested" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## How old are most of the vehicles? Does vehicle damage has any effect on the Response variable?" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "(data\n", + " .Vehicle_Age\n", + " .value_counts()\n", + " .plot.pie(startangle=50,\n", + " figsize=(8,8),\n", + " autopct=\"%0.1f%%\",\n", + " title='Distribution of customers based on their vehicle age')\n", + ")\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "sns.countplot(x=data.Vehicle_Age,\n", + " hue=data.Response)\n", + "plt.title('Interest in buying vehicle insurance by vehicle age')\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Insight:\n", + "More than half of the customer in the data have vehicles aged 1-2 years old and they are the ones who are mostly interested" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "pd.crosstab(data.Response, data.Vehicle_Damage).plot.barh(title='Interest in buying vehicle insurance by previous vehicle damage')\n", + "plt.xlabel('count')\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Insight:\n", + "- customer whose vehicle had past damages are mostly interested" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Plot a correlation matrix, remove the two least correlated features\n" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "plt.figure(figsize=(12,10))\n", + "\n", + "sns.heatmap(data.corr(), annot=True, cmap='Spectral')\n", + "plt.title('Correlation Heat Map')\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Previously_Insured -0.341170\n", + "Policy_Sales_Channel -0.139042\n", + "id -0.001368\n", + "Vintage -0.001050\n", + "Driving_License 0.010155\n", + "Region_Code 0.010570\n", + "Annual_Premium 0.022575\n", + "Age 0.111147\n", + "Response 1.000000\n", + "Name: Response, dtype: float64" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data.corr()['Response'].sort_values()" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 334399\n", + "1 46710\n", + "Name: Response, dtype: int64" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_model = data.copy()\n", + "df_model.drop(['id', 'Vintage'], axis=1, inplace=True)\n", + "df_model.Response.value_counts()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Split the data into train and test, to avoid data leakage" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [], + "source": [ + "X = df_model.drop(['Response'],axis=1) \n", + "\n", + "y = df_model.loc[:,'Response']\n", + "\n", + "X_train, X_test , y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## One Hot Encoding the categorical variables" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [], + "source": [ + "train = pd.get_dummies(data = X_train, columns= ['Gender', 'Vehicle_Damage', 'Vehicle_Age'], drop_first=True)\n", + "test = pd.get_dummies(data = X_test, columns= ['Gender', 'Vehicle_Damage', 'Vehicle_Age'], drop_first=True)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Lets fit a base model of Logistic regression, calculate accuracy, auc_roc score and print classification report.\n", + "\n", + "## What are your observations? Are the results satisfactory?" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "0.8733698932066858\n" + ] + } + ], + "source": [ + "logisticRegression = LogisticRegression(max_iter=1000)\n", + "\n", + "logisticRegression.fit(train, y_train)\n", + "\n", + "predictions = logisticRegression.predict(test)\n", + "\n", + "print(accuracy_score(y_test, predictions))" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "0.5038488355619435\n" + ] + } + ], + "source": [ + "print(roc_auc_score(y_test, predictions))" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " precision recall f1-score support\n", + "\n", + " 0 0.88 1.00 0.93 66699\n", + " 1 0.31 0.01 0.02 9523\n", + "\n", + " accuracy 0.87 76222\n", + " macro avg 0.59 0.50 0.48 76222\n", + "weighted avg 0.81 0.87 0.82 76222\n", + "\n" + ] + } + ], + "source": [ + "print(classification_report(y_test, predictions))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Let us upsample the class with less data so that our model can learn about the minority class" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
AgeDriving_LicenseRegion_CodePreviously_InsuredAnnual_PremiumPolicy_Sales_ChannelGender_MaleVehicle_Damage_YesVehicle_Age_< 1 YearVehicle_Age_> 2 YearsResponse
33280339115.0052906.055.001001
11624838111.0023038.026.011000
25500522130.0145318.0152.010100
31747423141.0129132.0151.000100
34421256148.002630.0154.011010
\n", + "
" + ], + "text/plain": [ + " Age Driving_License Region_Code Previously_Insured Annual_Premium \\\n", + "332803 39 1 15.0 0 52906.0 \n", + "116248 38 1 11.0 0 23038.0 \n", + "255005 22 1 30.0 1 45318.0 \n", + "317474 23 1 41.0 1 29132.0 \n", + "344212 56 1 48.0 0 2630.0 \n", + "\n", + " Policy_Sales_Channel Gender_Male Vehicle_Damage_Yes \\\n", + "332803 55.0 0 1 \n", + "116248 26.0 1 1 \n", + "255005 152.0 1 0 \n", + "317474 151.0 0 0 \n", + "344212 154.0 1 1 \n", + "\n", + " Vehicle_Age_< 1 Year Vehicle_Age_> 2 Years Response \n", + "332803 0 0 1 \n", + "116248 0 0 0 \n", + "255005 1 0 0 \n", + "317474 1 0 0 \n", + "344212 0 1 0 " + ] + }, + "execution_count": 26, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df = pd.concat([train,y_train],axis=1)\n", + "df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
AgeDriving_LicenseRegion_CodePreviously_InsuredAnnual_PremiumPolicy_Sales_ChannelGender_MaleVehicle_Damage_YesVehicle_Age_< 1 YearVehicle_Age_> 2 YearsResponse
11624838111.0023038.026.011000
25500522130.0145318.0152.010100
31747423141.0129132.0151.000100
34421256148.002630.0154.011010
2622930118.0135118.0152.010100
\n", + "
" + ], + "text/plain": [ + " Age Driving_License Region_Code Previously_Insured Annual_Premium \\\n", + "116248 38 1 11.0 0 23038.0 \n", + "255005 22 1 30.0 1 45318.0 \n", + "317474 23 1 41.0 1 29132.0 \n", + "344212 56 1 48.0 0 2630.0 \n", + "26229 30 1 18.0 1 35118.0 \n", + "\n", + " Policy_Sales_Channel Gender_Male Vehicle_Damage_Yes \\\n", + "116248 26.0 1 1 \n", + "255005 152.0 1 0 \n", + "317474 151.0 0 0 \n", + "344212 154.0 1 1 \n", + "26229 152.0 1 0 \n", + "\n", + " Vehicle_Age_< 1 Year Vehicle_Age_> 2 Years Response \n", + "116248 0 0 0 \n", + "255005 1 0 0 \n", + "317474 1 0 0 \n", + "344212 0 1 0 \n", + "26229 1 0 0 " + ] + }, + "execution_count": 27, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# separating the two classes\n", + "df_majority = df[df['Response']==0]\n", + "df_minority = df[df['Response']==1]\n", + "\n", + "df_majority.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(37187, 11)" + ] + }, + "execution_count": 28, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_minority.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(267700, 11)" + ] + }, + "execution_count": 29, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_minority_upsampled = resample(df_minority,replace=True,n_samples=y_train.value_counts()[0],random_state = 123)\n", + "df_minority_upsampled.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(267700, 11)" + ] + }, + "execution_count": 30, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_majority.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(535400, 11)" + ] + }, + "execution_count": 31, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "balanced_df = pd.concat([df_minority_upsampled,df_majority])\n", + "balanced_df.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": {}, + "outputs": [], + "source": [ + "balanced_df = shuffle(balanced_df)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Split the predictors and the target variables" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": {}, + "outputs": [], + "source": [ + "X_train = balanced_df.drop('Response',axis=1)\n", + "y_train = balanced_df['Response']" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Again fit a logistic regression model, find accuracy, auc_roc score and observe the results, have they improved?\n", + "## What are your observations?" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "0.6439741806827425\n" + ] + } + ], + "source": [ + "logisticRegression2 = LogisticRegression()\n", + "\n", + "logisticRegression2.fit(X_train, y_train)\n", + "\n", + "predictions = logisticRegression2.predict(test)\n", + "\n", + "print(accuracy_score(y_test, predictions))" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "0.7841039196693164\n" + ] + } + ], + "source": [ + "print(roc_auc_score(y_test, predictions))" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " precision recall f1-score support\n", + "\n", + " 0 0.99 0.60 0.75 66699\n", + " 1 0.26 0.97 0.41 9523\n", + "\n", + " accuracy 0.64 76222\n", + " macro avg 0.62 0.78 0.58 76222\n", + "weighted avg 0.90 0.64 0.70 76222\n", + "\n" + ] + } + ], + "source": [ + "print(classification_report(y_test, predictions))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Upsampling of minority class improved the model performace and lifted (increased) auroc, recall and f1-scores" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.3" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/Video_game_sales_analysis/notebook/Video_Game_Sales-MK.ipynb b/Video_game_sales_analysis/notebook/Video_Game_Sales-MK.ipynb new file mode 100644 index 0000000..a3f60f8 --- /dev/null +++ b/Video_game_sales_analysis/notebook/Video_Game_Sales-MK.ipynb @@ -0,0 +1,2246 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "source": [ + "# Exploring Video Game Sales\n", + "\n", + "This dataset contains a list of video games with sales greater than 100,000 copies.\n", + "\n", + "In this project I will deal only with exploratory analysis, where the objective is to understand how the data is distributed and generate insight for future decision-making, this analysis aims to explore as much as possible the data in a simple, intuitive and informative way. The data used in this project contains information only from 1980 to 2016." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "source": [ + "### Data information : \n", + "- Rank - Ranking of overall sales, integer\n", + "- Name - The games name\n", + "- Platform - Platform of the games release (i.e. PC,PS4, etc.), object\n", + "- Year - Year of the game's release, float\n", + "- Genre - Genre of the game ,object\n", + "- Publisher - Publisher of the game, object\n", + "- NA_Sales - Sales in North America (in millions), float\n", + "- EU_Sales - Sales in Europe (in millions), float\n", + "- JP_Sales - Sales in Japan (in millions), float\n", + "- Other_Sales - Sales in the rest of the world (in millions), float\n", + "- Global_Sales - Total worldwide sales, float\n", + "\n", + "## Source:\n", + "- Hosted on [Kaggle](https://www.kaggle.com/gregorut/videogamesales), web [scraped](https://github.com/GregorUT/vgchartzScrape) from [VGChartz.com](http://www.vgchartz.com/gamedb/)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "source": [ + "## Import necessary libraries" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "outputs": [], + "source": [ + "import numpy as np\n", + "import pandas as pd\n", + "pd.set_option('display.max_columns', None)\n", + "\n", + "\n", + "import matplotlib.pyplot as plt\n", + "%matplotlib inline\n", + "\n", + "import seaborn as sns\n", + "sns.set_style('whitegrid')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "See many other pandas options [here](https://pandas.pydata.org/pandas-docs/stable/user_guide/options.html#available-options)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "source": [ + "## Read the data" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
RankNamePlatformYearGenrePublisherNA_SalesEU_SalesJP_SalesOther_SalesGlobal_Sales
01Wii SportsWii2006.0SportsNintendo41.4929.023.778.4682.74
12Super Mario Bros.NES1985.0PlatformNintendo29.083.586.810.7740.24
23Mario Kart WiiWii2008.0RacingNintendo15.8512.883.793.3135.82
34Wii Sports ResortWii2009.0SportsNintendo15.7511.013.282.9633.00
45Pokemon Red/Pokemon BlueGB1996.0Role-PlayingNintendo11.278.8910.221.0031.37
\n", + "
" + ], + "text/plain": [ + " Rank Name Platform Year Genre Publisher \\\n", + "0 1 Wii Sports Wii 2006.0 Sports Nintendo \n", + "1 2 Super Mario Bros. NES 1985.0 Platform Nintendo \n", + "2 3 Mario Kart Wii Wii 2008.0 Racing Nintendo \n", + "3 4 Wii Sports Resort Wii 2009.0 Sports Nintendo \n", + "4 5 Pokemon Red/Pokemon Blue GB 1996.0 Role-Playing Nintendo \n", + "\n", + " NA_Sales EU_Sales JP_Sales Other_Sales Global_Sales \n", + "0 41.49 29.02 3.77 8.46 82.74 \n", + "1 29.08 3.58 6.81 0.77 40.24 \n", + "2 15.85 12.88 3.79 3.31 35.82 \n", + "3 15.75 11.01 3.28 2.96 33.00 \n", + "4 11.27 8.89 10.22 1.00 31.37 " + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data = pd.read_csv(\"../data/vgsales.csv\")\n", + "data.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "RangeIndex: 16598 entries, 0 to 16597\n", + "Data columns (total 11 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 Rank 16598 non-null int64 \n", + " 1 Name 16598 non-null object \n", + " 2 Platform 16598 non-null object \n", + " 3 Year 16327 non-null float64\n", + " 4 Genre 16598 non-null object \n", + " 5 Publisher 16540 non-null object \n", + " 6 NA_Sales 16598 non-null float64\n", + " 7 EU_Sales 16598 non-null float64\n", + " 8 JP_Sales 16598 non-null float64\n", + " 9 Other_Sales 16598 non-null float64\n", + " 10 Global_Sales 16598 non-null float64\n", + "dtypes: float64(6), int64(1), object(4)\n", + "memory usage: 1.4+ MB\n" + ] + } + ], + "source": [ + "data.info()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "source": [ + "## Task 1. Which genre have the most games been made for?\n" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "Text(0.5, 1.0, 'Top Game Genre')" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAaoAAAESCAYAAACsFpHuAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAgAElEQVR4nO3deVxU9f7H8dcMi6CQS6hYLiHmHmpZaqntRrhdF9zHzG6mXa+ZKZhKUWouWGRmJKmpaCAS9nCtLLpamktqikqauOKSG2qorDO/P/w5SahRDswMvJ9/OTPnnPl8Z9J333POfD8Gi8ViQURExEEZ7V2AiIjIrSioRETEoSmoRETEoSmoRETEoSmoRETEoSmoRETEobnauwARZzNhwgS2bNkCQGpqKnfffTceHh4ALF682Prnf+rYsWPMmDGD7du34+rqSm5uLoGBgQwdOhQ3N7fbrv/vSk9P58MPP+SHH37Azc2NzMxM2rRpw2uvvYaXl1ex1yOlj0G/oxL555544gmmT5/OfffdZ5Pj/fbbb3Tr1o1hw4YRHByMwWDg0qVLjB49mqpVqzJu3DibvE9hZWRk0K1bNzp27MhLL72Em5sb2dnZTJkyhWPHjvHxxx8Xaz1SOmlGJWJDM2fOZOXKlbi4uODn50dYWBiVK1fGZDLRsGFDtm7dSnp6Op07d2bYsGEF9o+OjqZdu3b06NHD+ly5cuUICwvjq6++AuDy5cuEh4dz+PBhzp8/T7ly5Zg2bRq1a9fGZDLRqFEjfv75Z86dO0ePHj04c+YMmzdv5sqVK7z//vvUq1eP33//nYkTJ7Jv3z5ycnJo1aoVISEhuLrm/ychPj6ee+65h6FDh1qfc3d3JyQkhDlz5mA2mzEajSQlJREVFUVOTg4eHh6EhobSrFkzZsyYwbFjxzh9+jTHjh2jatWqREREUKVKFZ544gkCAgLYu3cvI0aMICAggLfffpsTJ06Qk5ND+/btGTx4cBF9U+JMdI1KxEY+//xzvv/+exISEli+fDn33nsvo0ePtr5+8OBBYmNjWbp0KatWreK7774rcIyffvqJ1q1bF3i+SpUqmEwmANatW8cdd9zB4sWL+eqrr2jcuDGLFi2ybnvs2DHi4uKIiIggIiKChx56iMTERNq0acPChQsBeOedd2jUqBGJiYl88cUXpKen8+mnnxa6njJlyvDyyy9jNBo5dOgQkZGRREdH88UXXzB+/Hj++9//cvnyZesxpk+fzpdffomnpydxcXHW49x7772sXr2ap59+mlGjRtGtWzcSExNJSEhgw4YNrFq1qrAfv5RgmlGJ2Mi6devo2rUrZcuWBaB///58/PHHZGdnA9CzZ0/c3Nxwc3MjMDCQH374gccffzzfMSwWCwaDwfp49uzZLF++HIAzZ86wcuVKAgMDqVGjBjExMRw+fJjNmzfTrFkz6z5PP/00ADVq1ACgTZs2ANSsWZPNmzcD8L///Y/k5GQSEhIAyMzMvOGY/lzPsmXLmDNnDgDnzp3jk08+YevWrZw6dYoBAwZYtzMYDBw5cgSAhx56yHotq2HDhly4cMG6XfPmzYGrs8QtW7Zw4cIFpk+fbn3ul19+ISgo6KafuZQOCioRGzGbzfn+UTebzeTm5lofX39azWKxYDQWPKHRrFkzNm/ebA2wf//73/z73/8GoF69epjNZj777DPi4+Pp27cvHTt2pEKFCqSlpVmP4e7unu+YN7oBw2w2M336dPz9/QG4ePFivtr/XE+/fv0A6NSpE506dQKuXp/LycnBbDbTqlUr3n//fet+J06coEqVKqxZsybfzSUGg4HrL4tfC3Wz2YzFYiEuLg5PT0/gahCWKVOmQE1S+ujUn4iNtGnThs8//9x6yismJoYHH3zQGhzLli3DbDZz4cIFVq9ezRNPPFHgGEOGDGH16tV88cUX5OXlAZCbm2s9BWY0Gvnhhx/o0qULwcHB+Pn5kZSUZN22sFq3bs28efOwWCxkZ2czZMgQ62nB6/Xp04f9+/cze/Zs68zQbDbzww8/cP78eVxcXGjVqhXr168nNTUVgLVr19KpU6ebztJuxMvLi6ZNm1pPP168eJHevXvz7bff/q1xScmkGZWIjXTv3p0TJ04QHByM2WymVq1aTJs2zfp6ZmYm3bt359KlS/Tp04dWrVoVOIavry+LFy/mww8/tJ5iu3TpEk2bNiU+Pp4KFSowcOBA3njjDetpu6ZNm7Jv376/VevYsWOZOHEiHTt2JCcnh4cfftg6c7uel5cXcXFxREVF0b17d+BqiDRo0IDp06fTsGFDAN5++21GjBiBxWLB1dWVqKgoypUr97dqmjZtGuPHj6djx45kZ2fToUMH6+xNSjfdni5SDEwmE3379iUwMNDepYg4HZ36ExERh6YZlYiIODTNqERExKEpqERExKHprj8b27Ztm/V3IM4oKyvLqX+7ovrtS/Xbn7OOISsri6ZNm97wNQWVjRkMBho0aGDvMv6xlJQU1W9Hqt++nL1+cN4xpKSk3PQ13UxhY3t276Zho0b2LkNEpFhZcvMwuLr84/1vFbCaUdmYwWjkdFTBX/iLiJRklYf0K7Jj62YKERFxaAoqERFxaAoqERFxaCUuqKKjo2ndujVZWVk33Wbv3r1s2bIFgFdffdW6KrSIiDieEhdUy5cvJygoiJUrV950m6+//pr9+/cDEBkZWaB/j4iIOI4Sddffpk2bqFmzJr169WLUqFF07dqVHTt2MHHiRCwWC1WrViUsLIylS5fi5uZGo0aNGD58OKtXr+b06dOMHTuW3NxcDAYD48aNo379+rRr147777+fgwcPcueddzJjxgxcXP75LZgiIvL3lKigWrJkCcHBwdSuXRt3d3d27NhBWFgYkZGR+Pv7s2jRIs6cOUOXLl3w8fEhICDAuu/UqVMxmUw89dRTpKSkMGbMGBITEzl69Cjz58+nWrVq9OrVi+Tk5Jv+elpERGyvxATVhQsXWLduHefOnSMmJoaMjAwWLlzI2bNnre22+/btC0BSUlKB/VNTU3nwwQcBaNCgASdPngSgYsWKVKtWDYBq1ard8tqXiIjYXokJqmXLltGtWzdCQ0MBuHLlCk8++SQeHh4cOnSIe+65h+joaPz8/DAYDJjN5nz7+/v789NPP/Hkk0+SkpKCj48PcHVJJBERsZ8SE1RLlixh6tSp1seenp60a9cOHx8fxowZg9FopHLlygwYMAA3NzemTp1qnWkBhISEEBYWxty5c8nNzWXixIn2GIaIiPyJ1vqzsZSUFHz+t9XeZYiIFKvbXULpVmv9lbjb00VEpGRRUImIiEMrMdeoHIXFbC7SVYRFRBzR7bb5uBXNqGwsy8mXY7pV8zJnoPrtS/Xbn73GUFQhBQoqERFxcAoqGyvj5OsGOmML6+upfvtylPotuc59ZkPy0zUqGzMYjRz5oLu9yxAp1WoOS7B3CWJDmlGJiIhDU1CJiIhDU1CJiIhDKxHXqKKjo9mwYQNGoxGDwcCrr75K48aN//HxFi5cSL9++i2UiIgjcPqg2r9/P0lJScTGxmIwGEhJSSE0NJRly5b942NGRUUpqEREHITTB1WlSpU4fvw4CQkJtG3blgYNGpCQkIDJZMLPz4+DBw9isViIjIykcuXKTJ48ma1bry4a26FDB5577jlGjx7N+fPnOX/+PI8++igXLlwgPDyc5557jtdffx1XV1dcXFyYOnUqVatWtfOIRURKF6e/RlWpUiWioqLYtm0bPXv2JDAwkO+++w6A+++/n5iYGJ599llmzZrFd999R1paGvHx8Xz22WesWLGCvXv3AtCyZUvi4uIYMmQI5cuXJzw8nA0bNtCoUSM+/fRTBg8ezIULF+w5VBGRUsnpZ1SHDx/Gy8uLSZMmAZCcnMygQYPw8fGhZcuWwNXASkpKwtfXl+bNm2MwGHBzc6NJkyakpqYC4OfnV+DY3bt355NPPuHf//433t7evPrqq8U3MBERAUrAjGrv3r2Eh4dbW8T7+fnh7e2Ni4sLu3btAmDbtm3UqVMHf39/62m/nJwctm/fTq1atYD8nXyvtej69ttveeCBB5g/fz6BgYHMnj27OIcmIiKUgBlVu3btSE1NJTg4mLJly2KxWAgJCWH+/PksXbqUefPm4enpydSpU6lYsSKbN2+mZ8+e5OTkEBgYSKNGjQoc09/fn5EjRzJs2DBGjRrFjBkzMBqNvP7663YYoYhI6VZiO/yaTCbCw8PztZsvDikpKZRbE1as7yki+f3TJZRu1WXWWTjrGNThV0REnJbTn/q7mZiYGHuXICIiNlBig8peLGazVm4WsTNLbjYGV+duuSN/0Kk/G1OHX/tS/fblKPUrpEoWBZWIiDg0BZWNuavDr12p/pvLU9dbcVK6RmVjRqORr+YE2bsMkQKeeWGVvUsQ+Uc0oxIREYemoBIREYemoBIREYdWKoJq06ZN1KtXj1Wr8p+j79ixI6NHj2bo0KF2qkxERP5KqQgqgNq1a7NixQrr471793LlyhUAPvzwQ3uVJSIif6HUBFX9+vU5ceIEFy9eBGDZsmV07NgRgEceeQSARYsWERwcTM+ePZkyZQoAhw4dol+/fvTs2ZPnnnuOc+fO2WcAIiKlVKkJKoCnn36aNWvWYLFY2LlzJ82aNcv3emJiImPHjmXx4sXUqFGD3NxcpkyZwqBBg1i8eDE9e/Zkz549dqpeRKR0KlVB1bFjR1atWsWWLVto3rx5gdcnTZpEXFwc/fr14/jx41gsFg4ePGgNtKCgIFq3bl3cZYuIlGqlKqhq1KjB5cuXiYmJoVOnTgVej4+P56233mLhwoWkpKSwfft2/P39SU5OBq6eLtSq7CIixatUBRVcnRWdOHECPz+/Aq/Vq1eP7t27079/fypVqkSTJk0ICQlh1qxZmEwmli9fbr2uJSIixaPEdvi1l5SUFI5seM3eZYgUUBxLKDlrd9lrnL1+cN4xqMOviIg4LQWViIg4NK2ebmNms1mrVItDysvNxkUNBcUJaUZlY9nq8GtXqv/mFFLirBRUIiLi0BRUIiLi0BRUNqZW9PblzPXn5jn3aWORoqKbKWzMaDQyK+YZe5chTugl01f2LkHEIWlGJSIiDk1BJSIiDq1IgmrTpk20atUKk8mEyWSia9euDBs27Ka3bo8ePZp169YV6tiJiYk89thj1mP37NnT2rn3iSeeICsr62/Vqu6+IiKOrciuUbVs2ZLIyEjr49dee42kpCQCAwNv+9gdOnRg5MiRAJw/f55OnTrx7LPP/qNjqbuviIhjK5abKbKzszl16hTly5dn8uTJbN26FbgaOM8995x1u5ycHN58800OHz6M2Wxm+PDhtGjR4pbH/v333/Hw8MBgMFif27dvH5MnT8ZsNnPx4kXGjRvH5cuXiY+P54MPPgCgV69efPDBB3Tp0oX169djMpmoX78+v/76KxkZGUyfPp27776bmTNn8s0331CpUiWuXLnCK6+88pc1iYiI7RRZUG3cuBGTycTZs2cxGo306NGDzMxM0tLSiI+PJzc3lz59+tCyZUvrPkuWLKFixYq88847pKen069fP1auXFng2CtWrGDHjh0YDAY8PT2ZOnVqvtf3799PaGgo9erVY/ny5SQmJjJ+/HgmTJjAhQsXOH36NBUrVqRKlSr59gsICGDs2LFERkaycuVK2rZty/fff09CQgI5OTlq8SEiYgdFfuovPT2dgQMHUr16dVJTU2nevDkGgwE3NzeaNGlCamqqdZ99+/axdetWdu7cCUBubi47duxg2rRpAHTq1AkXF5d8p/5upEqVKnz00Ud4eHhw6dIlvLy8MBgMdOrUiRUrVpCWlkb37t0L7NewYUMAfH19OXPmDKmpqdx33324uLjg4uJC48aNbfkRiYhIIRT5XX8VK1YkIiKCcePG4ePjYz3tl5OTw/bt26lVq5Z129q1a9O+fXtiYmL45JNPCAwM5L777iMmJoaYmBiCg4ML9Z4TJ05k2LBhTJkyhbp163Kt5Va3bt348ssv2bJlC48++uhfHqdOnTokJydjNpvJzs5mz549/+ATEBGR21Es16jq1KmDyWQiKSmJ6tWr07NnT3JycggMDKRRo0bW7Xr16sW4cePo168fGRkZ9OnTB6Px72dpp06dePnll7nzzjvx9fUlPT0dgKpVq1KuXDmaNm2Kq+tfD71evXo8+uij9OjRg4oVK+Lm5lao/URExHZKXYffl156iTFjxuSbyd3M2bNn+fLLL+nbty/Z2dm0b9+e+fPnc9ddd910n5SUFNb9NNyWJUsp8ZLpK6ftznqN6rc/Zx3DreouNdODzMxM+vTpQ5s2bQoVUnD1tOWuXbvo1q0bBoOB4ODgW4aUiIjYXqkJKg8PDxITE//WPkajkUmTJhVRRSIiUhilJqiKi9ls1uKi8o9o9XSRG9NafzamDr/25cz1u7o4d4sYkaKioBIREYemoBIREYemoLIxdfi1L3vVn6PrSyJFRjdT2JjRaOT5pbe/Qrw4l0+7fGnvEkRKLM2oRETEoSmoRETEoTlVUEVHRzNgwAAGDhzICy+8wK5duzCZTPlWYP8n1qxZw2+//WajKkVExJac5hrV/v37SUpKIjY2FoPBQEpKCqGhoZQvX/62j71gwQLCw8OpWrWqDSoVERFbcpqgqlSpEsePHychIYG2bdvSoEEDEhISeOGFF5g5cyZnzpzhypUrvPfee9SoUeOGnYTT0tIYO3Ysubm5GAwGxo0bx8mTJ62h99lnn7F48WJWrFiBwWAgKCiI/v37M3r0aM6fP8/58+eZNWuWTcJRREQKx6mCKioqioULFzJz5kw8PDx49dVXAXj00Ufp3LkzM2bM4Msvv6ROnTo37CQ8c+ZMTCYTTz31FCkpKYwZM4bExEQaNGhAeHg4R44cYdWqVXz22WcYDAYGDBhA69atgauNIAcMGGDHT0BEpHRymqA6fPgwXl5e1kVik5OTGTRoED4+PtbOuz4+PtbOvDfqJJyamsqDDz4IXP29zcmTJ/O9x759+zh+/Lg1kC5cuMCRI0cA8PPzK6aRiojI9ZzmZoq9e/cSHh5OVlYWcDU4vL29cXFxKbCtv7//DTsJ+/v789NPPwFX14Tz8fEBwGAwYLFYqF27NnXq1GHBggXExMTQtWtX6tata91GRESKn9PMqNq1a0dqairBwcGULVsWi8VCSEgI8+fPL7Dt448/zubNmwt0Eg4JCSEsLIy5c+eSm5vLxIkTAWjWrBkhISHMnTuXVq1a0bt3b7KzswkICNANFiIidlbqOvwWtZSUFKb+8qq9y5BiZquVKZy1O+s1qt/+nHUMt6rbaU79iYhI6aSgEhERh+Y016ichdls1gKlpVBOXjZuanwoUiQ0o7Ixdfi1L3vVr5ASKToKKhERcWgKKhERcWgKKhtTh1/7Ku76s/Nyi/X9REoj3UxhY0ajkfZLI+xdhhSTlV1G2bsEkRJPMyoREXFoCioREXFoDhNU0dHRtG7d2rro7PViY2OZMWOGTd5H3XxFRJxLoYPq0KFDrF27lpMnT1IUywMuX76coKAgVq5cafNjX2/BggVkZGQU6XuIiIjtFOpmioULF7JmzRouXLjAv/71L44cOcIbb7xhsyI2bdpEzZo16dWrF6NGjaJr16789NNPvPPOO5QvXx6j0UjTpk1ZsGABFy9eZOjQoWRnZ9OpUyeWLVt206687u7uHDt2jFOnTjF58mROnz5t7eYbERFBaGgo8fHxAPTo0YP33nuPpUuXsn37di5fvszEiRPZsGFDgWOLiEjxKdSMauXKlcybNw9vb28GDBjAjh07bFrEkiVLCA4Opnbt2ri7u7Njxw4mTZrEu+++y6effkr16tUB6Ny5M6tXr8ZisfDtt9/y+OOP5+vK+9lnn/HNN99w4MABAO666y7mzJmDyWRi8eLFPPbYYzRo0IApU6bg5uZ203pq165NXFwcFovlpscWEZHiUagZ1bVTfdeaB9ryt0IXLlxg3bp1nDt3jpiYGDIyMli4cCG//fabtavu/fffz5EjRyhfvjwNGjRg69atLF26lNDQUPbu3XvTrrzXflPj6+vLtm3bCjVG+KOb7806/tauXdtm4xcRkVsrVFB16NCBvn37cvz4cV588UWeeuopmxWwbNkyunXrRmhoKABXrlzhySefxMPDg9TUVPz9/UlOTqZ8+fLA1VN08+fPJzMzE39/f3JycqhTpw6zZ8/GYDAwb9486taty5dffnnDrrzXuvmWKVOGs2fPkpeXx6VLl0hLS7NuYzRenWhe6/j752OLiEjxKVRQPfzww7Rq1Yp9+/bh5+dH/fr1bVbAkiVLmDp1qvWxp6cn7dq1w9fXl9DQUMqVK0e5cuWsQfXQQw8RFhbGkCFDAKhfv/7f6sp7fTffRx55hO7du1OzZk1q1apVYNu/e2wREbG9QnX47d27N7GxscVRj9NLSUlh5C8r7F2GFBNbr0zhrN1Zr1H99uesY7hV3YWaUZUtW5Z33nkHPz8/62mxnj172q5CERGRmyhUUDVr1gyAs2fPFmkxIiIif1aooBo6dChnz5694aoRkp/ZbNZCpaVIdl4u7i5a21mkKBXqb9hbb73F2rVrqVKlChaLBYPBQFxcXFHX5pRKQodfZzy/fU1x16+QEil6hfpbtmPHDr755hvr9SkREZHiUqjkqVWrlk77iYiIXRRqRnXixAkef/xx62+NdOrv5tTh176Kuv7svDzcXVyK9D1EJL9CBdW7775b1HWUGEajkQ4Ji+xdhhSRFd372rsEkVKnUEHl6upKREQE6enpPPPMM9SrV4+77767qGsTEREp3DWqsLAwunXrRnZ2Ns2bN2fixIlFXZeIiAhQyKDKysqiVatWGAwGateuTZkyZYq6rkLZtGkTrVq1wmQyYTKZ6Nq1K8OGDftbt4hHR0ezc+fOIqxSRERuR6FO/bm7u/P9999jNpv5+eefHeqGgZYtWxIZGWl9/Nprr5GUlERgYGCh9h80aFBRlSYiIjZQqKAaP348o0ePZvfu3URGRjrsqb/s7GxOnTpF+fLlGTt2LCdPniQ9PZ22bdsyfPhwDh06xLhx48jJycHDw4PIyEimTp1KUFAQZ86cYe3atWRmZnLkyBFefPFFunbtys6dO3nrrbcoV64cd955J2XKlGHy5Mn2HqqISKlxy1N/+/fvp3///vj6+nLy5Enq1q3LoUOHOHjwYHHV95c2btyIyWQiKCiIrl278vTTT1OjRg2aNm3KnDlziI2Nta78PmXKFAYNGsTixYvp2bMne/bsyXesjIwMZs2aRVRUFNHR0QC8+eabTJ48mQULFlCzZs1iH5+ISGl3yxnVtGnTGDXq6rp1lStXJiYmhsOHDzNu3DjatGlTLAX+lWun/tLT0xk4cCDVq1enQoUKJCcns3HjRry8vKzXrA4ePGhdYDcoKAiAFSv+aMlxrc9WtWrVrPucOnWKe++9F4AHHniAVatWFdvYRETkL2ZUV65c4b777gPA29sbuLpKRW5ubtFX9jdVrFiRiIgIxo0bx7x58/D29ubdd99l4MCBZGZmYrFYrN2C4Wpn4ZiYmHzHuFFHYF9fX/bv3w9cXUpKRESK1y1nVNcvm/TRRx/9sZOrYy7EWadOHUwmEykpKRw8eJCtW7fi6elJrVq1OHXqFCEhIbzxxhtERUXh4eFBREQEu3fvvuUx33zzTcaMGUPZsmVxc3NTh18RkWJ2y8SpUqUKO3fuJCAgwPrczp07qVy5cpEXVhgtWrSgRYsW+Z671qL+ZubPn5/v8Y1ujChTpgxJSUkAJCcn8/HHH1OpUiUiIyNxc3O7zapFROTvuGVQjRo1ipdffpmWLVtSq1Ytjh49yo8//sjHH39cXPXZ3Z133snAgQMpW7Ys3t7euuNPRKSY3TKoatSowZIlS0hKSiItLY3GjRvzyiuvULZs2eKqz+4CAwML/ZssERGxvb+82OTh4WG9Q07+mtls1sKlJZhWTxcpfuqEaGMlocOvMyvq+hVSIsVPQSUiIg5NQSUiIg5NQWVj7u6OsbL8P+WIHX6z8/LsXYKI2JFj/nLXiRmNBv6V8K29yyhRvuj+pL1LEBE70oxKREQcmoJKREQcmoJKREQcmsMH1Z/bzffo0YOYmBhMJhOpqak33W/Lli388ssvACxatIjOnTurRYeIiBNyipsprm83n52dTWBgoLXtyM18/vnnBAUFUb9+fdasWcPUqVOpV69ecZQrIiI25BRBdb2MjAyMRiMu/79CwMmTJwkPDycrK4vz58/zn//8B19fX77//nt2797Nzp072bVrF2PHjiUyMpI1a9awcuVKXF1dad68OaNGjWLGjBls376dy5cvM3HiREaPHk21atVIS0ujffv2/Prrr+zZs4fHHnuMESNG2PkTEBEpXZwiqK61mzcYDLi5uREWFsbs2bMBOHDgAM8//zwtWrRg27ZtzJgxg08//ZQ2bdoQFBRE27Zt2bRpE+Hh4Vy+fJnVq1cTFxeHq6sr//3vf/nuu+8AqF27NuPGjSMtLY2jR48yd+5cMjMzefLJJ1m3bh2enp48/vjjCioRkWLmFEF1/am/a64FVeXKlYmKiiIhIQGDwXDL7sMHDhygSZMm1p5SzZs359dffwXAz8/Pul2NGjXw9vbG3d0dHx8fKlSoANy4A7CIiBQth7+Z4q9Mnz6dzp07ExERQYsWLbBYLMDVULn252tq167Nzp07yc3NxWKxsGXLFmtAGY1/fBQKJBERx+EUM6pbCQwMZOLEicyaNYtq1aqRnp4OQJMmTZg2bRrVq1e3bluvXj2effZZevfujdls5oEHHuCpp56y3h0oIiKOx2D587RDbktKSgqv7z5u7zJKlL+zhFJKSopDrldYWKrfvpy9fnDeMdyqbqc/9SciIiWbgkpERBya01+jcjRms0WrfduY2r+LlG6aUdlYdnaWvUu4LY7Yil4hJVK6KahERMShKahsTB1+bScnTzekioiuUdmc0Whg2NKj9i6jRPigSw17lyAiDkAzKhERcWgKKhERcWgKKhERcWh2Daro6GgGDBjAwIEDeeGFF9i1axcTJ07k+PF/vgTRjBkziI2NvQNryo4AABK6SURBVOnrx48fJykpCeC230tERIqe3W6m2L9/P0lJScTGxmIwGEhJSSE0NJRly5YV6ftu3LiRAwcO8MQTTzB27NgifS8REbl9dguqSpUqcfz4cRISEmjbti0NGjQgISEBk8lEeHg4q1at4vDhw6Snp3PhwgX69OnD119/zcGDB5kyZQo+Pj6MGDGC+Ph4AHr06MF7771nPX5eXh5vvPEGJ0+eJD09nbZt2/Lf//6X6OhoMjMzadasGfPmzSM8PJzKlSszatQoMjIyyMvL45VXXqFVq1Z07NiRhx56iL1792IwGPjoo4/w9va210cmIlIq2e3UX6VKlYiKimLbtm307NmTwMBAa7fdazw8PJgzZw7t2rVj7dq1fPzxxwwaNIiVK1f+5fFPnDhB06ZNmTNnDrGxscTGxuLi4sKgQYPo0KEDTz75xzJHUVFRPPzwwyxatIjp06czduxYzGYzly5don379ixcuJAqVaqwbt06m38OIiJya3abUR0+fBgvLy8mTZoEQHJyMoMGDcLHx8e6TcOGDQHw9vamTp06AJQvX56srILLFP25W0mFChVITk5m48aNeHl5kZ2dfdNaUlNT6dixIwBVq1bFy8uLc+fO5auhWrVqN3xfEREpWnabUe3du5fw8HDrP/5+fn54e3vjct26brfqtFumTBnOnj1LXl4eFy9eJC0tLd/riYmJeHt78+677zJw4EAyMzOxWCwYjUbMZnO+bf39/fnpp58A+O2337h48aLaz4uIOAi7zajatWtHamoqwcHBlC1bFovFQkhICPPnzy/U/pUrV+aRRx6he/fu1KxZk1q1auV7vVWrVowYMYKtW7fi6elJrVq1OHXqFHXr1iUqKopGjRpZt33ppZcYM2YMX331FZmZmbz99tu4umrRDhERR6AOvzaWkpJC1C9e9i6jRPgnSyg5a3fTa1S/fTl7/eC8Y1CHXxERcVoKKhERcWi6EGNjZrNFq37bSE6eBTcX3cwiUtppRmVj6vBrOwopEQEFlYiIODgFlY2pw69t5Km7r4j8P12jsjGj0cDqxWfsXYbTe7anz19vJCKlgmZUIiLi0BRUIiLi0BRUIiLi0JzmGlVaWhqdOnXKt0ZfixYtABg6dOgN90lMTOTAgQOMHDky3/NbtmzB29ub+vXrM3ToUD788MOiK1xERG6L0wQVQJ06dYiJibnt43z++ecEBQVRv359hZSIiINzqqD6s02bNhEXF0dkZCRLlixh0aJFlC9fHjc3N4KCggDYsWMHAwcO5Ny5c/Tu3ZtGjRrx/fffs3v3burUqUNwcDDr16/HZDJRv359fv31VzIyMpg+fTp33303M2fO5JtvvqFSpUpcuXKFV155xTqTExGRoudUQbV//35MJpP1cXBwMADnzp1j9uzZfPHFF7i7u9O/f3/rNq6ursyZM4djx44xaNAgVq1aRZs2bQgKCuKuu+7Kd/yAgADGjh1LZGQkK1eupG3btnz//fckJCSQk5Njba4oIiLFx6mC6s+n/jZt2gTAkSNH8Pf3x9PTE4BmzZpZt2nYsCEGg4HKlSuTmZl5y+Nf6+br6+vLmTNnSE1N5b777sPFxQUXFxcaN25s6yGJiMhfKBF3/dWsWZMDBw6QmZmJ2Wxm586d1tdu1KHXYDAUaF1/I3Xq1CE5ORmz2Ux2djZ79uyxad0iIvLXnGpGdTOVKlXixRdfpE+fPlSoUIGsrCxcXV3Jzc294fZNmjRh2rRpVK9e/ZbHrVevHo8++ig9evSgYsWKuLm5qfOviEgxc5p/datXr058fHy+51q0aEGLFi3Izc3l1KlTJCYmAtC3b1+qVavGgw8+aN22TJkyJCUlAdCrVy969eoFwPr16wHynVLs3bs3AGfPnuWOO+4gISGB7Oxs2rdvT7Vq1YpukCIiUoDTBNWtuLq6cuXKFbp06YKbmxsBAQE0b978to9bsWJFdu3aRbdu3TAYDAQHBxe4AUNERIpWiQgqgBEjRjBixAibHtNoNDJp0iSbHlNERP6eEhNUjsJstmjlbxvIy7PgosaJIkIJuevPkajDr20opETkGgWViIg4NAWVjZVRh18AzLnq0CsitqFrVDZmMBo49P5Je5dhd/cM97V3CSJSQmhGJSIiDk1BJSIiDk1BJSIiDs0prlFFR0ezYcMGjEYjBoOBV199FTc3Ny5evJhvmaRbub6rr4iIOA+HD6r9+/eTlJREbGwsBoOBlJQUQkNDefrpp/Hx8Sl0UF3f1VdERJyHwwdVpUqVOH78OAkJCbRt25YGDRoQFRWFyWTCzc2NRo0aMWbMGO655x7c3d0JCQkhPDycrKwszp8/z3/+8x98fX3zdfXdsWMH8+bNw2g08sADDzBy5EjOnTvHyJEjyc7Oxs/Pj40bNxIdHc2oUaNISEgAYPjw4QwcOJCAgAA7fyoiIqWHUwRVVFQUCxcuZObMmXh4ePDqq6/SpUsXfHx8CAgI4PLly7z88ss0bNiQDRs28Pzzz9OiRQu2bdvGjBkz+PTTT61dfcuWLcuMGTP4/PPP8fT0ZNSoUaxfv561a9fy5JNP0rdvX9avX8/69evx8/PDw8OD/fv34+PjQ1pamkJKRKSYOXxQHT58GC8vL+visMnJyQwaNIj27dvj4/PHmnp+fn4AVK5cmaioKBISEjAYDAV6Uh05coRz584xaNAgAC5dusTRo0dJTU2lS5cuAPlWXg8ODiYxMZG77rqLTp06FelYRUSkIIe/62/v3r3WU3lwNZC8vb2pUKECZrPZup3ReHUo06dPp3PnzkRERNCiRQtrJ99rXX2rV69OtWrVmDt3LjExMfTr148mTZpQt25dtm/fDsDPP/9sPW5gYCDr169nzZo1CioRETtw+BlVu3btSE1NJTg4mLJly2KxWAgJCcHV1ZWpU6fi7++fb/vAwEAmTpzIrFmzqFatGunp6cAfXX3ff/99BgwYgMlkIi8vj7vvvptnn32WF198kZCQEFavXk2VKlWsnXzLlCnDgw8+yLlz56hQoUKxj19EpLRz+KACGDJkCEOGDCnw/GOPPQZg7dwL0KFDBzp06FBg2+u7+vr7+9O5c+d8r2/evJlhw4YREBDAhg0bOH36tPW13NxcgoODbTEUERH5m5wiqIpD9erVGTNmDC4uLpjNZsaOHQvAwIEDqVKlCq1atbJzhSIipZOC6v/5+/uzePHiAs/PnTvXDtWIiMg1Ciobs5gtWjmcq20+jK5qfigit8/h7/pzNlnq8AugkBIRm1FQiYiIQ1NQ2VhJ7/BryTXf8nUREVvTNSobMxgN/Db9R3uXUWSqvqK7H0WkeGlGJSIiDk1BJSIiDk1BJSIiDs3pg2rTpk20atUKk8mEyWSiR48exMTE3NYx09LS6NGjh40qFBGR21EibqZo2bIlkZGRAGRnZxMYGEjnzp2544477FyZiIjcrhIRVNfLyMjAaDQyYMAAIiIi8Pf3JzY2ljNnztC6dWvee+89ANLT07l8+TLvvvtugecWLFhgPd7mzZuJjIzExcWFGjVq8Pbbb+Pm5maXsYmIlEYlIqg2btyIyWTCYDDg5uZGWFgYs2fPLrBds2bNiImJ4fz58wwePJgpU6bQqFGjAs9dY7FYCAsL47PPPuPOO+/k/fffZ+nSpTotKCJSjEpEUF1/6u+a64PqWvNEuNrR9z//+Q/Dhg2jUaNGN3wuLS0NgHPnznHq1CmGDx8OQGZmJo888khRD0dERK5TIoLqRtzd3Tl9+jT+/v7s2bOHqlWrkp2dzbBhw+jbty8PP/wwwA2fu6ZixYr4+vry0Ucf4e3tzbfffkvZsmXtMRwRkVKrxAZV//79efvtt6lWrRpVqlQBYMGCBezevZvc3FxiY2MBaNOmTYHnRo8eDVxtbz927FgGDRqExWKhXLlyTJ061T4DEhEppQyW68+LyW1LSUmh0tfn7V1GkXH0JZRSUlL+cr1CR6b67cvZ6wfnHcOt6nb631GJiEjJpqASERGHVmKvUdmLxWxx+NNjt8OSa8bgqv+/EZHio39xbKykd/hVSIlIcdPNFDb2888/U6aMczdPFBEpbllZWTRt2vSGrymoRETEoek8joiIODQFlYiIODQFlYiIODQFlYiIODQFlYiIODQFlYiIODStTGEjZrOZ8PBw9u7di7u7OxMmTKBWrVr2LuuG/vWvf+Ht7Q1A9erVGTx4MKNHj8ZgMHDvvffy5ptvYjQaiY+PJy4uDldXV4YMGcLjjz9u17p37NjBtGnTiImJ4fDhw4WuOTMzk1GjRnH27FnKlSvHlClTqFSpkl3r3717N4MHD+aee+4BoHfv3gQFBTlk/Tk5OYwZM4Zjx46RnZ3NkCFDqFOnjlN9/jcag6+vr9N8B3l5eYwbN46DBw/i4uLCpEmTsFgsTvUd3BaL2MRXX31lCQ0NtVgsFsv27dstgwcPtnNFN5aZmWnp3Llzvudeeukly8aNGy0Wi8USFhZm+frrry2nTp2ydOjQwZKVlWW5ePGi9c/2Eh0dbenQoYMlODj4b9c8d+5cywcffGCxWCyWFStWWMaPH2/3+uPj4y1z5szJt42j1p+QkGCZMGGCxWKxWM6dO2d59NFHne7zv9EYnOk7WLNmjWX06NEWi8Vi2bhxo2Xw4MFO9x3cDp36s5GtW7fSpk0bAJo2bcquXbvsXNGN/fLLL1y5coWBAwfSv39/fv75Z3bv3s1DDz0EQNu2bdmwYQM7d+6kWbNmuLu74+3tTc2aNfnll1/sVnfNmjWZMWOG9fHfqfn676Zt27b8+OOPdq9/165d/O9//6Nv376MGTOGjIwMh60/MDCQV155xfrYxcXF6T7/G43Bmb6Dp556ivHjxwNw/PhxfHx8nO47uB0KKhvJyMjAy8vL+tjFxYXc3Fw7VnRjHh4evPDCC8yZM4e33nqLkSNHYrFYMBgMAJQrV47ff/+djIwM6+nBa89nZGTYq2yeeeYZXF3/OFP9d2q+/vlr2xa3P9cfEBBASEgIixYtokaNGsycOdNh6y9XrhxeXl5kZGQwbNgwhg8f7nSf/43G4EzfAYCrqyuhoaGMHz+eZ555xum+g9uhoLIRLy8vLl26ZH1sNpvz/cPkKPz8/OjUqRMGgwE/Pz8qVKjA2bNnra9funSJO+64o8B4Ll26lO8vgL0ZjX/8p/tXNV///LVt7e3pp5+mcePG1j/v2bPHoes/ceIE/fv3p3PnznTs2NEpP/8/j8HZvgOAKVOm8NVXXxEWFkZW1h8LYDvLd/BPKahs5P7772fdunXA1YVp69ata+eKbiwhIYHJkycD8Ntvv5GRkcEjjzzCpk2bAFi3bh3NmzcnICCArVu3kpWVxe+//05qaqpDjalhw4aFrvn+++9n7dq11m0feOABe5YOwAsvvMDOnTsB+PHHH2nUqJHD1n/mzBkGDhzIqFGj6N69O+B8n/+NxuBM38EXX3zBrFmzAPD09MRgMNC4cWOn+g5uhxaltZFrd/3t27cPi8XCO++8g7+/v73LKiA7O5vXX3+d48ePYzAYGDlyJBUrViQsLIycnBxq167NhAkTcHFxIT4+nsWLF2OxWHjppZd45pln7Fp7WloaI0aMID4+noMHDxa65itXrhAaGsrp06dxc3Pj3XffpXLlynatf/fu3YwfPx43Nzd8fHwYP348Xl5eDln/hAkTWL16NbVr17Y+N3bsWCZMmOA0n/+NxjB8+HAiIiKc4ju4fPkyr7/+OmfOnCE3N5cXX3wRf39/p/s78E8pqERExKHp1J+IiDg0BZWIiDg0BZWIiDg0BZWIiDg0BZWIiDg0BZWIWJ0/f57ly5fbuwyRfBRUImK1d+9ekpKS7F2GSD6Ot8aPiBRKZmam9cfb19pYLF68mKNHj5KXl8fzzz9PUFAQJpOJ8PBw/P39iY2N5cyZM3Tp0oXXXnsNX19fjh49yn333cdbb73Fxx9/zC+//MLixYvp2bOnvYcoAiioRJxWXFwcd999N5GRkezbt49vvvmGihUrEhERQUZGBl27dqVly5Y33f/QoUPMmTMHT09PnnrqKU6fPs3gwYOJi4tTSIlD0ak/ESd14MABmjZtCkDdunU5ffo0Dz74IHB1kWR/f3+OHj2ab5/rF6KpWbMmXl5euLi4ULly5XyLnIo4EgWViJPy9/cnOTkZgKNHj7Jy5Up++ukn4GrbmX379lG9enXc3d05ffo0AHv27LHuf61FxPWMRiNms7kYqhcpPAWViJPq1asXaWlp9OvXj5CQEGbPns358+fp3bs3/fv3Z+jQodx5553079+ft99+mxdeeIG8vLxbHrNmzZrs27ePefPmFc8gRApBi9KKiIhD04xKREQcmoJKREQcmoJKREQcmoJKREQcmoJKREQcmoJKREQcmoJKREQc2v8ByIQsg2n+7CQAAAAASUVORK5CYII=\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "ax = sns.countplot(y='Genre',\n", + " data=data, \n", + " order=data.Genre.value_counts().index)\n", + "ax.set_title('Top Game Genre')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "source": [ + "Resources:\n", + "- https://seaborn.pydata.org/generated/seaborn.countplot.html\n", + "- https://github.com/mwaskom/seaborn/issues/1029#issuecomment-342365439" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "source": [ + "## Insight:\n", + "Action is the most popular genre followed by Sports" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "source": [ + "## Task 2. Which year had the most game releases?" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "Text(0.5, 1.0, 'Game releases by year')" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "plt.figure(figsize=(10,10))\n", + "sns.countplot(y='Year', data=data).set_title('Game releases by year')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "source": [ + "## Insight:\n", + "- Most games were released between 2008 and 2011\n", + "- Some areas to explore would be to look whether this trend of releasing fewer games is due to the fact there are more in-game-purchases or due to continous development trends?" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "source": [ + "## Task 3. What are the top game genres produced for the five years with maximum game production?" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "2009.0 1431\n", + "2008.0 1428\n", + "2010.0 1259\n", + "2007.0 1202\n", + "2011.0 1139\n", + "Name: Year, dtype: int64" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# lets get the five years with max game production\n", + "max_gp = data.Year.value_counts().nlargest(5)\n", + "max_gp" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "## lets try using pandas and matplotlib\n", + "(data\n", + " .loc[data.Year.isin(max_gp.index),['Year','Genre']]\n", + " .groupby(['Year','Genre'])\n", + " .Genre\n", + " .count()\n", + " .unstack()\n", + " .plot.bar(figsize=(30,10), title='Distribution of genres for five max production years')\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "Text(0.5, 1.0, 'Distribution of genres for five max production years')" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# with seaborn this type of plot is very easy to make\n", + "plt.figure(figsize=(30,10))\n", + "sns.countplot(x='Year',\n", + " data=data,\n", + " order=data.Year.value_counts().nlargest(5).index.sort_values(),\n", + " hue='Genre').set_title('Distribution of genres for five max production years')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "source": [ + "## Insight:\n", + "Nothing interesting in particular, except that Action is the dominant genre" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "source": [ + "## Task 4. Which genre has sold the most games per year?\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
YearGenreGlobal_Sales
01980.0Shooter7.07
11981.0Action14.84
21982.0Puzzle10.03
31983.0Platform6.93
41984.0Shooter31.10
\n", + "
" + ], + "text/plain": [ + " Year Genre Global_Sales\n", + "0 1980.0 Shooter 7.07\n", + "1 1981.0 Action 14.84\n", + "2 1982.0 Puzzle 10.03\n", + "3 1983.0 Platform 6.93\n", + "4 1984.0 Shooter 31.10" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "## lets find the global sales for each genre per year\n", + "sales_by_year = (data\n", + " .groupby(by=['Year','Genre'])\n", + " .Global_Sales\n", + " .sum()\n", + " .reset_index())\n", + "\n", + "## Lets find the max global sales per year\n", + "sales_by_year['Max_Global_Sales'] = (sales_by_year\n", + " .groupby(['Year'])\n", + " .Global_Sales\n", + " .transform(max)\n", + " )\n", + "\n", + "## Lets filter out the most selling genre per year\n", + "max_sales_by_year = (sales_by_year\n", + " .loc[sales_by_year.Global_Sales == sales_by_year.Max_Global_Sales]\n", + " .drop(columns=['Max_Global_Sales'])\n", + " .reset_index(drop=True)\n", + ")\n", + "\n", + "max_sales_by_year.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "plt.figure(figsize=(30,10))\n", + "ax = sns.barplot(x='Year',\n", + " y='Global_Sales',\n", + " hue='Genre',\n", + " palette=\"Set2\",\n", + " dodge=False,\n", + " data=max_sales_by_year)\n", + "ax.set_title('Highest selling genres by year, globally')\n", + "## adding annotaions on bars\n", + "for index in range(0,max_sales_by_year.shape[0]):\n", + " ax.text(index, max_sales_by_year.Global_Sales[index]+1,\n", + " str(max_sales_by_year.Genre[index] + '---' + \n", + " str(round(max_sales_by_year.Global_Sales[index],2))),\n", + " rotation=90)\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "source": [ + "Resources:\n", + "- https://stackoverflow.com/a/59683826/8210613\n", + "- https://python-graph-gallery.com/46-add-text-annotation-on-scatterplot/\n", + "- https://thepythonguru.com/python-string-formatting/" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "source": [ + "## Insight:\n", + "Action has dominated the market in the last 15 years or so." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "source": [ + "## Task 5. Which platform has the highest sales globally?" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "[Text(0.5, 1.0, 'Global Sales by Platform')]" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "\n", + "df_plat_sales = (data\n", + " .groupby('Platform')\n", + " .Global_Sales.sum()\n", + " .sort_values(ascending=False)\n", + " .reset_index()\n", + ")\n", + "plt.figure(figsize=(15,10))\n", + "sns.barplot(x='Global_Sales',\n", + " y='Platform',\n", + " data=df_plat_sales).set(title='Global Sales by Platform')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "source": [ + "## Task 6. Which individual game has the highest sales globally?" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
NameGlobal_Sales
0Wii Sports82.74
1Super Mario Bros.40.24
2Mario Kart Wii35.82
3Wii Sports Resort33.00
4Pokemon Red/Pokemon Blue31.37
5Tetris30.26
6New Super Mario Bros.30.01
7Wii Play29.02
8New Super Mario Bros. Wii28.62
9Duck Hunt28.31
10Nintendogs24.76
11Mario Kart DS23.42
12Pokemon Gold/Pokemon Silver23.10
13Wii Fit22.72
14Wii Fit Plus22.00
15Kinect Adventures!21.82
16Grand Theft Auto V21.40
17Grand Theft Auto: San Andreas20.81
18Super Mario World20.61
19Brain Age: Train Your Brain in Minutes a Day20.22
\n", + "
" + ], + "text/plain": [ + " Name Global_Sales\n", + "0 Wii Sports 82.74\n", + "1 Super Mario Bros. 40.24\n", + "2 Mario Kart Wii 35.82\n", + "3 Wii Sports Resort 33.00\n", + "4 Pokemon Red/Pokemon Blue 31.37\n", + "5 Tetris 30.26\n", + "6 New Super Mario Bros. 30.01\n", + "7 Wii Play 29.02\n", + "8 New Super Mario Bros. Wii 28.62\n", + "9 Duck Hunt 28.31\n", + "10 Nintendogs 24.76\n", + "11 Mario Kart DS 23.42\n", + "12 Pokemon Gold/Pokemon Silver 23.10\n", + "13 Wii Fit 22.72\n", + "14 Wii Fit Plus 22.00\n", + "15 Kinect Adventures! 21.82\n", + "16 Grand Theft Auto V 21.40\n", + "17 Grand Theft Auto: San Andreas 20.81\n", + "18 Super Mario World 20.61\n", + "19 Brain Age: Train Your Brain in Minutes a Day 20.22" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "## it seems like the data set is already sorted by Global_Sales\n", + "data.loc[:19,['Name','Global_Sales']]" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "data.Global_Sales.head(20).plot()" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "Need for Speed: Most Wanted 12\n", + "Madden NFL 07 9\n", + "LEGO Marvel Super Heroes 9\n", + "FIFA 14 9\n", + "Ratatouille 9\n", + " ..\n", + "Haze 1\n", + "Digging for Dinosaurs 1\n", + "Age of Empires III: Gold Edition 1\n", + "Classic Action: Devilish 1\n", + "Spore Hero 1\n", + "Name: Name, Length: 11493, dtype: int64" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# however the games seem to repeat, maybe due to be re-released on different plaform or regions?\n", + "data.Name.value_counts()" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "[Text(0.5, 1.0, 'Top 20 selling games globally')]" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# so lets group them together and calculate the total global_sales before plotting\n", + "df_name_sales = (data\n", + " .groupby('Name')\n", + " .Global_Sales.sum()\n", + " .sort_values(ascending=False)\n", + " .reset_index()\n", + " .head(20)\n", + ")\n", + "plt.figure(figsize=(15,10))\n", + "sns.barplot(x='Global_Sales',\n", + " y='Name',\n", + " data=df_name_sales).set(title='Top 20 selling games globally')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "source": [ + "## Insight:\n", + "Wii Sports is by far the most top selling game in the world, followed by GTA V and Super Mario Bros." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "source": [ + "## Task 7. Find the total revenue by region." + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "(data\n", + " .loc[:,['NA_Sales','EU_Sales','JP_Sales','Other_Sales']]\n", + " .sum()\n", + " .reset_index()\n", + " .rename(columns={'index':'region',0:'sales'})\n", + " .set_index('region')\n", + " .plot.pie(y='sales', startangle=270,figsize=(10,8),\n", + " autopct='%.1f%%', title=\"Sales by region\")\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "source": [ + "## Task 8. What other questions you would want to get answered?" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "source": [ + "8.1. Distribution of genres for top five producing years" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "Text(0.5, 1.0, 'Distribution of genres for top five producing years')" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAABaYAAAJZCAYAAABShOLQAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAgAElEQVR4nOzdf5xVdb0v/tf8YAZjQEStblfxiIqghYgkx1CUPApZXNTUFMPjIfNEIqA3DBFBBX8kSiWK2snshqWH1NTTj8fJ8CQKyCnKUART8/qDy1VBvMwgDAOs7x9+nSOKiLDZA/h8/jVrr/V57/daew97eM1nPquiKIoiAAAAAABQJpUt3QAAAAAAAB8tgmkAAAAAAMpKMA0AAAAAQFkJpgEAAAAAKCvBNAAAAAAAZSWYBgAAAACgrATTAAAt6OWXX07Xrl0zcODADBw4MAMGDMjpp5+eX//6183HfP/738999923yTo33nhjfve732103zvHH3jggXn99dc/VI/z58/PuHHjkiRPPPFEhg8f/qHGb4l169Zl6NCh6devX+64445t/nyl1NDQkNNPPz1f/OIX89vf/naL64wdOzZPPvlkyfq6+eabc8wxx+Tiiy/OJZdcktmzZ5esdql96Utfyty5c7do7IwZMzJx4sQSdwQAQKlVt3QDAAAfda1bt87999/fvL148eKcffbZqaqqSr9+/TJixIgPrDF37tzsv//+G923OeM35dlnn80rr7ySJPnMZz6TG264YavqbY5XXnkljz76aB5//PFUVVVt8+crpYULF2bZsmV58MEHt6rO7Nmz85WvfKVEXSV33313rrvuuvTs2bNkNbdHxx57bI499tiWbgMAgA8gmAYA2M789//+3zN8+PDcdttt6devX0aPHp0DDjggX/va13LDDTfkwQcfTKtWrbLbbrvl6quvzoMPPpgnn3wy1157baqqqjJjxoy88cYbeemll3LMMcdk2bJlzeOT5Hvf+16eeOKJrF+/PiNHjkzfvn1z77335t///d9z6623Jknz9mWXXZYbbrgh9fX1ufjii3PiiSdmwoQJ+eUvf5n6+vpcfvnlWbRoUSoqKnLUUUflwgsvTHV1dT7zmc/k3HPPzaxZs/Lqq6/mnHPOyaBBg95zrn/84x9z7bXXZtWqVWnVqlVGjhyZHj165JxzzsnatWtz8sknZ8qUKenYsWPzmNdffz0XX3xxXnzxxbRv3z577rlnDjjggJx//vl57rnncuWVV+aNN97IunXrMnjw4JxyyimZO3duvvvd72bvvffOM888k7Vr1+byyy/PYYcdltGjR29wvUaMGJHrrrsuf/jDH7Ju3bocdNBBGTt2bOrq6vKzn/0sd911V1q1apXa2tpcccUVG/xC4G9/+1vGjBmTV155JQMHDsy//uu/5tFHH82NN96Y9evXp02bNrn44ovTrVu3TJkyJY8//nheffXVHHjggbnuuuua63z3u9/Nq6++mm9961u59tpr84lPfCKXXXZZFi9enKIocuKJJ+acc87Jyy+/nMGDB+eoo47KX/7ylxRFkXHjxr0nfB45cmReeeWVXHLJJRkxYkTuvPPOnHnmmXnqqaeycuXKXHrppUmShx9+ODfeeGN+/vOf509/+lOuu+66rFq1KpWVlRk2bFj69u37ntfwoIMOyte//vU88sgjefPNN3PhhRfm+OOPz7333pu77747q1atSl1dXaZNm5abbropv/rVr1JVVZV99903l156afbcc888++yzGTNmTFatWpVOnTrlzTffTPLWXxQMGDAgf/7znze6feutt+YXv/hFqqurs88+++Saa67Jgw8+2PxeHjx4cLp3754//elPWbJkSY444ohMmDAhlZWVuffee/ODH/wgrVu3zt///d/nJz/5SZ566qkNzu3mm2/Os88+m+uvv775/Tpx4sTcd99973t93nzzzVx22WV54YUX8sYbb6RNmza57rrr0qlTpwwePDi77rpr/va3v+WMM87IJz7xidx8882pqKhIVVVVLrroonz2s5/dnH8mAAB2fAUAAC3mpZdeKrp37/6ex//6178WhxxySFEURfHtb3+7+OEPf1j8n//zf4oePXoUjY2NRVEUxW233VY8+OCDRVEUxVe/+tXiN7/5TfPx//iP/9hc6+3xRVEUnTt3Lm699daiKIri6aefLg4//PBi2bJlxT333FOce+65zWPeuf3Orx977LHii1/8YlEURXHRRRcVEyZMKNavX180NjYWQ4YMaa7duXPnYtq0aUVRFMUTTzxRfPrTny5Wr169wTm+/vrrxRFHHFE8/vjjzed8+OGHFy+++OL7XpeiKIoLLriguPbaa4uiKIpXXnml6N27d3HDDTcUTU1NxQknnFA8+eSTRVEUxYoVK4ovfOELxZ///OfiscceK7p27Vo89dRTzdfuzDPP3Oj1mjJlSnHNNdcU69evL4qiKK6//vpi/Pjxxdq1a4uDDz64eOWVV4qiKIpf/OIXxV133fWe/t55jZ599tnic5/7XPHiiy8WRVEUs2fPLnr37l3U19cXN9xwQ9GvX7+iqalpo+fZt2/fYv78+UVRFMWZZ55Z/OhHP2o+rwEDBhS//OUvi5deeqno3Llz8cADDxRFURS///3vi969exdr1qzZZL233y8vvvhi0atXr+b31IgRI4rp06cXb7zxRnH88ccXL730UlEURfF//+//Lfr06VMsXrz4PXU7d+5c3HzzzUVRFMXChQuLww47rPk99dnPfraor68viqIo7r777uIrX/lKsXLlyqIoiuKGG24ohgwZUhRFUQwcOLCYPn16URRF8cc//rE48MADi8cee+w974N3bv/ud78rjj/++OKNN94oiqIorrrqqmLq1KkbvF+/+tWvFsOHDy/WrVtX1NfXF0ceeWQxZ86c4plnnimOOOKIYsmSJc2veefOnd9zbkuXLi169OhRLF++vCiKohg1alRx5513bvL6/OY3vykmTJjQXOPSSy8trrjiiuZ+Lr744uZ9xx57bPHnP/+5KIqieOSRR4opU6a8pwcAgJ2VGdMAANuhioqKtG7deoPHPvGJT6RLly456aST0qdPn/Tp0ydHHHHERscfdthh71v7jDPOSJJ07tw5++23X/Ps0w9r5syZufPOO1NRUZGampqcfvrp+V//63/l3HPPTZLm5RQOPvjgrFmzJm+++WZqa2ubx8+fPz8dO3bMIYcckiQ54IAD0qNHj/znf/5nevXq9b7P+/DDD+cXv/hFkuTjH/94+vfvnyT53//7f+fFF1/MmDFjmo9dvXp1nnrqqey333751Kc+la5duyZ5a5bv2zWSDa/X73//+9TX1zevwdzU1JTdd989VVVV6d+/f04//fQcc8wxOfLII3P00Udv8ho99thj+fu///vsvffeSZIjjjgiHTp0aF47unv37qmu3vSP5G+++Wb+9Kc/5Uc/+lGSpG3btjn55JMzc+bMHHLIIdl1110zYMCAJMnRRx+dqqqqPP300/n0pz+9ybpJsvfee+fAAw/MQw89lCOOOCKPPfZYrrzyyvzxj3/Ma6+9lvPOO6/52IqKijz99NP51Kc+9Z46X/3qV5MkXbp0SefOnfOHP/whyVtrmtfV1SV56/1y8skn52Mf+1iS5Kyzzsott9ySpUuX5umnn86JJ56Y5K3X4oADDvjA3ufMmZP+/ftn1113TZJcfPHFSd6a7f9Offv2TWVlZerq6rLPPvvk//2//5dFixald+/e+eQnP9nc/5QpU97zHLvvvnuOOeaY3H///TnxxBPz6KOPZvz48Zu8Pv3798/ee++dadOm5YUXXsh//ud/5tBDD20+7p2z2b/4xS9m2LBhOfroo9O7d+98/etf/8DzBgDYWQimAQC2Q0888UQ6d+68wWOVlZW544478sQTT2TOnDm56qqrctRRR+Wiiy56z/i3w7+Nqaz8r/tfr1+/PtXV1amoqEhRFM2PNzU1fWCP69evT0VFxQbba9eubd5+O4R++5h31k/eusHhO8e/fcw7a2xMdXX1BrXePp9169albdu2G6zXvXTp0rRt2zaPP/74BkH/u8/3nddr/fr1GTNmTHPovHLlyjQ2NiZJrrvuuvz1r3/N7Nmz84Mf/CD3339/vv/9779vr+++Ru8+x029Tu+s8e5r985r/e41uNevX/+h1uU+7bTTct9992XZsmX5h3/4h7Rp0ybr1q3Lfvvtl5///OfNx73yyivp0KHDRmu88/ne+fzvvq6ber+88xzfDus39b6sqqraoN6KFSuyYsWK9/S2sde9qqpqg7qbul5nnnlmLrvsslRXV+f444//wOvzs5/9LNOnT8+ZZ56ZAQMGpH379nn55Zebj3vnNbngggvy5S9/ObNmzcq9996bH/3oR7n77rvftxcAgJ1J5QcfAgBAOT3//POZOnVqhgwZssHjixYtype+9KXst99++ed//uecffbZeeKJJ5K8Fax9UKD7trdnCi9YsCAvvvhiDjnkkHTo0CHPPPNMGhsb09TUlH//939vPv79ah955JG54447UhRF1qxZk+nTp+dzn/vcZp9n9+7d87e//S3z589PkjzzzDP5wx/+kMMPP3yT444++ujm8G758uX53e9+l4qKiuy7774b3EhyyZIl+dKXvtQ8O3lzHXnkkfnpT3+aNWvWZP369bn00kszefLkvP766zn66KPTvn37nH322Rk5cmTz9X8/RxxxRB599NG89NJLSd6a5btkyZLmWeKb8vZ1r6uryyGHHJKf/vSnSZL6+vrcd999zdf69ddfz8yZM5MkDz30UFq1avWeX2psynHHHZcFCxZk+vTpOe2005K89dq88MILzTOfFy5cmH79+jXfBPPd7rvvviRvvaeef/75ja6TfNRRR+Wee+5pXj962rRp+exnP5s99tgjBx98cHPIu2DBgvz1r39NkrRr1y5NTU159tlnkyS/+tWvmut97nOfy4MPPpiGhoYkyZQpU/LjH/94s875yCOPzJw5c5rP550B87v16NEjlZWVue2223L66ad/4PV59NFHc9JJJ+XUU0/Nvvvum4ceeijr1q17T921a9fm85//fFatWpUzzjgj48ePz9NPP501a9Zs1jkAAOzozJgGAGhhq1evzsCBA5O8Nfu3trY2F154YY455pgNjuvSpUu+8IUv5Mtf/nI+9rGPpXXr1hk7dmyS5POf/3wmT568WTOdX3rppZx44ompqKjI5MmT0759+/Tu3Tuf/exn84UvfCF77rlnevXqlaeffjrJWyHcTTfdlGHDhmXw4MHNdcaOHZuJEydmwIABaWpqylFHHZVvfOMbm33eHTp0yPe///1MmDAhq1evTkVFRa6++ursu+++G8wwfbeLL744Y8eObZ6N+qlPfSqtW7dOTU1Npk6dmiuvvDI//OEPs3bt2owYMSKHHXZY5s6du9l9ffOb38x3vvOdnHTSSVm3bl26du2a0aNHp66uLkOHDs3ZZ5+d1q1bp6qqKhMnTtxkrf333z/jx4/PsGHDsm7durRu3Tq33HJL2rZt+4F9HHfccRk1alQuu+yyXHfddbniiity7733Zs2aNRkwYEBOPvnkLF68OLW1tbn//vtz3XXXpXXr1rnppps+1IzpmpqanHDCCZk9e3a6deuW5K3X5oYbbsi1116bxsbGFEWRa6+9NnvttddGa/zpT3/K9OnTs379+nz3u99tXl7jnU455ZQsWbIkp556atavX5999tmn+YaPkydPzsUXX5y77rorHTt2TKdOnZK8tWzJqFGj8vWvfz0dOnRoXrYleesXFM8++2zz0jT7779/JkyYkN/+9rcfeM777rtvLr744nzta19LTU1Nunbtml122eV9jz/55JPz61//Ol26dPnA6zNkyJCMGzeu+Zcn3bt3bw7a36m6ujpjxozJt771rea/WrjqqqtSU1Pzgf0DAOwMKop3/10gAABsx37605/moIMOyqGHHpo1a9Zk0KBBOf/88z9wveed0csvv5wBAwZs8TrhpXDggQdmzpw577vMx/bopZdeyv33359vfvObqayszG9/+9v8y7/8y0ZnTq9duzbDhg3L//gf/yMnnHBCC3QLALBzMmMaAIAdytszY9evX5+mpqb079//IxlKs+U++clP5tVXX82AAQNSVVWVtm3b5qqrrnrPcW/PyP6Hf/iHDWZrAwCw9cyYBgAAAACgrNz8EAAAAACAshJMAwAAAABQVoJpAAAAAADKaoe7+eHjjz+e2tralm4DAAAAAIBNaGxsTPfu3Te6b4cLpmtra9O1a9eWbgMAAAAAgE1YuHDh++6zlAcAAAAAAGUlmAYAAAAAoKwE0wAAAAAAlNUOt8Y0AAAAAMD2oKmpKS+//HJWr17d0q20qNatW2evvfZKq1atNnuMYBoAAAAAYAu8/PLLadu2bf7u7/4uFRUVLd1OiyiKIsuWLcvLL7+cfffdd7PHWcoDAAAAAGALrF69OrvvvvtHNpROkoqKiuy+++4feta4YBoAAAAAYAt9lEPpt23JNRBMAwAAAABsh4YPH54f/OAHzdsrV65Mv379smjRohbsqjQE0wAAAAAA26HLLrssd955Z5599tkkyXe+85185StfSZcuXVq4s63n5ocAAAAAANuhDh065NJLL83YsWNz4YUX5qWXXsrQoUNzzjnnpLGxMbW1tZkwYUL+23/7b7n++uvz5JNPZuXKldlvv/1y9dVXZ8qUKfnzn/+cN998M1deeWX222+/lj6lZoJpAAAAAIDt1Oc///k8+OCDGT16dO68885cffXVGTx4cI4++ujMmTMn1113XS6//PK0a9cut99+e9avX58vfvGLeeWVV5IknTp1ytixY1v4LN5LMA0AAAAAsB078cQTs3r16nziE5/IX//619x666354Q9/mKIo0qpVq9TW1ub111/PhRdemI997GN5880309TUlCTZd999W7j7jRNMAwAAAADsIDp16pQhQ4akR48eee655/KHP/whM2fOzJIlS/K9730vr7/+eh588MEURZEkqazcPm8zKJgGAAAAANhBfPvb385ll12WxsbGrF69Opdcckn22muvTJ06Naeddlpqamqy995759VXX23pVjepong7Ot9BLFy4MF27dm3pNgAAAACAjzhZ5X/Z2LXY1PXZPudxAwAAAACw0xJMAwAAAABQVoJpAAAAAADKSjANAAAAAEBZCaYBAAAAACgrwTQAAAAAAGVV3dINAAAAAADsDBqb1qW2VVXZ6jU1NWXMmDFZvHhx1qxZk6FDh2b//ffP6NGjU1FRkQMOOCDjx49PZWVlpk+fnrvuuivV1dUZOnRo+vbtmx/84Ad55JFHkiQrVqzI0qVLM2vWrA2eY2PjSkEwDQAAAABQArWtqnLYqJ+UrN68SWdtcv8DDzyQ9u3bZ9KkSVm+fHlOOumkdOnSJSNHjkyvXr0ybty4zJgxI927d8+0adNyzz33pLGxMYMGDUrv3r1z7rnn5txzz02S/PM//3O+9a1vbVD/tdde2+i4mpqarT43wTQAAAAAwA6of//+6devX/N2VVVVFixYkMMPPzxJ0qdPn8yaNSuVlZU59NBDU1NTk5qamnTs2DGLFi1Kt27dkiS//e1v065duxx11FEb1J8/f/4mx20Na0wDAADwHo1rG7erOgDAe7Vp0yZ1dXVpaGjI8OHDM3LkyBRFkYqKiub99fX1aWhoSNu2bTcY19DQ0Lx96623ZtiwYe+p/0HjtoYZ0wAAALxHbXVtek/pvdV1Zp0/64MPAgC22JIlS3Leeedl0KBBGTBgQCZNmtS8b+XKlWnXrl3q6uqycuXKDR5/O3B+9tln065du+yzzz7vqb2pcVvLjGkAAAAAgB3Q0qVLM2TIkIwaNSqnnHJKkuSggw7K3LlzkyQzZ85Mz549061bt8ybNy+NjY2pr6/Pc889l86dOydJZs+enT59+my0/qbGbS0zpgEAAAAAdkC33HJLVqxYkalTp2bq1KlJkksuuSQTJ07M5MmT06lTp/Tr1y9VVVUZPHhwBg0alKIocsEFF6S2tjZJ8vzzz6d37w3/Sur2229Px44dc+yxx77vuK1VURRFUZJKZbJw4cJ07dq1pdsAAADY6VnKAwA27d1ZZWPTutS2qipZ/VLX25Y2lttuKsu1lAcAAAAAQAmUOkTeUULpLSGYBgAAAACgrATTAAAAAACUlWAaAAAAAICyEkwDAAAAAFBWgmkAAAAAAMqquqUbAAAAAADYGRRrG1NRXVu2ek1NTRkzZkwWL16cNWvWZOjQodl///0zevToVFRU5IADDsj48eNTWVmZ6dOn56677kp1dXWGDh2avn37pr6+PhdccEFWrVqVVq1aZdKkSdlzzz03eI6NjSsFwTQAAAAAQAlUVNfmxSs+U7J6Hcc9scn9DzzwQNq3b59JkyZl+fLlOemkk9KlS5eMHDkyvXr1yrhx4zJjxox0794906ZNyz333JPGxsYMGjQovXv3zr333pvOnTvnoosuyvTp03Pbbbdl9OjRzfVfe+21jY6rqanZ6nMTTAMAAAAA7ID69++ffv36NW9XVVVlwYIFOfzww5Mkffr0yaxZs1JZWZlDDz00NTU1qampSceOHbNo0aJ07tw5f/vb35IkDQ0Nqa7eMC6eP3/+Rsd169Ztq3u3xjQAAAAAwA6oTZs2qaurS0NDQ4YPH56RI0emKIpUVFQ076+vr09DQ0Patm27wbiGhobstttumTVrVk444YTcdtttOeWUUzao/37jSkEwDQAAAACwg1qyZEnOOuusDBw4MAMGDEhl5X9FvitXrky7du1SV1eXlStXbvB427Ztc+ONN+acc87Jr3/969x22205//zzN6j9fuNKQTANAAAAALADWrp0aYYMGZJRo0Y1z3Y+6KCDMnfu3CTJzJkz07Nnz3Tr1i3z5s1LY2Nj6uvr89xzz6Vz585p165dc9C8++67bxBCJ3nfcaVgjWkAAAAAgB3QLbfckhUrVmTq1KmZOnVqkuSSSy7JxIkTM3ny5HTq1Cn9+vVLVVVVBg8enEGDBqUoilxwwQWpra3NiBEjMnbs2PzsZz/L2rVrM2HChCTJ7bffno4dO+bYY4/d6LhSqCiKoihJpTJZuHBhunbt2tJtAAAA7PR6T+m91TVmnT+rBJ0AwPbp3VllsbYxFdWlCW63Rb1taWO57aayXEt5AAAAAACUQKlD5B0llN4SgmkAAAAAAMpKMA0AAAAAQFkJpgEAAAAAKCvBNAAAAAAAZSWYBgAAAACgrKpbugEAAAAAgJ1B49rG1FbXlq1eU1NTxowZk8WLF2fNmjUZOnRo9t9//4wePToVFRU54IADMn78+FRWVmb69Om56667Ul1dnaFDh6Zv37554403MmrUqDQ0NKR9+/aZOHFidt999w2eY2PjSkEwDQAAAABQArXVtek9pXfJ6s06f9Ym9z/wwANp3759Jk2alOXLl+ekk05Kly5dMnLkyPTq1Svjxo3LjBkz0r1790ybNi333HNPGhsbM2jQoPTu3Tu33nprDjvssHzjG9/I7NmzM3ny5Fx55ZXN9V977bWNjqupqdnqc7OUBwAAAADADqh///4ZMWJE83ZVVVUWLFiQww8/PEnSp0+fzJ49O/Pnz8+hhx6ampqatG3bNh07dsyiRYvy7LPPpk+fPkmSHj16ZN68eRvUf79xpSCYBgAAAADYAbVp0yZ1dXVpaGjI8OHDM3LkyBRFkYqKiub99fX1aWhoSNu2bTcY19DQkK5du+ahhx5Kkjz00ENZvXr1BvXfb1wpCKYBAAAAAHZQS5YsyVlnnZWBAwdmwIABqaz8r8h35cqVadeuXerq6rJy5coNHm/btm3OPffcLF68OGeffXaWLFmST37ykxvUfr9xpSCYBgAAAADYAS1dujRDhgzJqFGjcsoppyRJDjrooMydOzdJMnPmzPTs2TPdunXLvHnz0tjYmPr6+jz33HPp3Llz/vjHP2bgwIH58Y9/nL322is9evTYoP77jSsFNz8EAAAAANgB3XLLLVmxYkWmTp2aqVOnJkkuueSSTJw4MZMnT06nTp3Sr1+/VFVVZfDgwRk0aFCKosgFF1yQ2tra7Lvvvvn2t7+dJPn4xz+eq666Kkly++23p2PHjjn22GM3Oq4UKoqiKEpSqUwWLlyYrl27tnQbAAAAO73eU3pvdY1Z588qQScAsH16d1bZuLYxtdWlCW63Rb1taWO57aayXEt5AAAAAACUQKlD5B0llN4SgmkAAAAAAMpKMA0AAAAAQFkJpgEAAAAAKCvBNAAAAAAAZSWYBgAAAACgrKpLXbCpqSljxozJ4sWLs2bNmgwdOjSf/OQn841vfCN/93d/lyQ544wzcsIJJ2T69Om56667Ul1dnaFDh6Zv376lbgcAAAAAoCzWNzamsra2bPU2lsXuv//+GT16dCoqKnLAAQdk/Pjxqax8a37y66+/ntNPPz3/9m//ltra2qxevTqjRo3KsmXL0qZNm3znO99Jhw4dNniObZXhljyYfuCBB9K+fftMmjQpy5cvz0knnZTzzjsv//RP/5QhQ4Y0H/faa69l2rRpueeee9LY2JhBgwald+/eqampKXVLAAAAAADbXGVtbR7uc3TJ6h098+FN7t9YFtulS5eMHDkyvXr1yrhx4zJjxowcd9xxeeSRR3L99ddn6dKlzePvvPPOdO7cOeeff35+9atfZerUqRk7dmzz/m2Z4ZZ8KY/+/ftnxIgRzdtVVVV58skn8/vf/z5nnnlmxowZk4aGhsyfPz+HHnpoampq0rZt23Ts2DGLFi0qdTsAAAAAADuljWWxCxYsyOGHH54k6dOnT2bPnp0kqayszO2335727ds3Hz9v3rwcddRRzcfOmTNng/rbMsMt+YzpNm3aJEkaGhoyfPjwjBw5MmvWrMmpp56aT3/607n55ptz0003pUuXLmnbtu0G4xoaGj6wfmNjYxYuXFjqtgEAAHiHrl27lqyW/8MBsLNqamrKqlWrmrd32WWXkj/HO+u/29tLdCxdujQjRozIN7/5zXz3u9/N6tWrkyStWrXKG2+8kf/beBwAACAASURBVFWrVqVHjx5JkvXr12fVqlVZv359VqxYkVatWmXVqlWprKzMihUrNni+5cuXZ5dddml+rHXr1lm2bNlGe2pqavpQn/klD6aTZMmSJTnvvPMyaNCgDBgwICtWrEi7du2SJMcdd1wmTJiQnj17ZuXKlc1jVq5cuUFQ/X5qa2tL+gMSAAAA25b/wwGws1q4cOE2CaPf6YPqvzOL/fKXv5zvf//7zWOampqy2267bVCjsrIyu+yyS2pra9OuXbusW7cuu+yyS+rr67PrrrtucOxuu+2WxsbG5sdWr16dPfbYY6M9tWrV6j2f+ZsKqku+lMfSpUszZMiQjBo1KqecckqS5Gtf+1rmz5+fJJkzZ04OPvjgdOvWLfPmzUtjY2Pq6+vz3HPPpXPnzqVuBwAAAABgp7SxLPaggw7K3LlzkyQzZ85Mz54933d8jx498vDDDzcfe9hhh22wf1tmuCWfMX3LLbdkxYoVmTp1aqZOnZokGT16dK666qq0atUqe+yxRyZMmJC6uroMHjw4gwYNSlEUueCCC1JbwjtWAgAAAADszDaWxV5yySWZOHFiJk+enE6dOqVfv37vO/6MM87It7/97Zxxxhlp1apVrr/++iTJ7bffno4dO+bYY4/dZhluRVEURUkqlcnChQv9GRgAAEAZ9J7Se6trzDp/Vgk6AYDt07uzyvWNjaks4eTbUtfbljaW224qyy35Uh4AAAAAAB9FpQ6Rd5RQeksIpgEAAAAAKCvBNAAAAAAAZSWYBgAAAACgrATTAAAAAACUlWAaAAAAAICyEkwDAAAAAJTA2qZ1Za3X1NSUUaNGZdCgQTnllFMyY8aMvPDCCznjjDMyaNCgjB8/PuvXr28+/vXXX8/xxx+fxsbG5sdeeOGFfOlLX9po/ccffzynnnpqTj/99Nx4442lOan/X3VJqwEAAAAAfERVt6rKjf/z30pWb9j1Aza5/4EHHkj79u0zadKkLF++PCeddFK6dOmSkSNHplevXhk3blxmzJiR4447Lo888kiuv/76LF26tHn8fffdl5/85CdZvnz5RuuPHz8+U6ZMyd57751zzz03CxYsyMEHH1ySczNjGgAAAABgB9S/f/+MGDGiebuqqioLFizI4YcfniTp06dPZs+enSSprKzM7bffnvbt2zcfv+uuu+aOO+7YaO2GhoasWbMmHTt2TEVFRY488sjMmTOnZL0LpgEAAAAAdkBt2rRJXV1dGhoaMnz48IwcOTJFUaSioqJ5f319fZKkd+/e2W233TYY37dv33zsYx/baO2GhobU1dVt8Fxv1yoFwTQAAAAAwA5qyZIlOeusszJw4MAMGDAglZX/FfmuXLky7dq126K6dXV1WblyZUlqbYxgGgAAAABgB7R06dIMGTIko0aNyimnnJIkOeiggzJ37twkycyZM9OzZ88tql1XV5dWrVrlxRdfTFEUefTRR7e41sa4+SEAAAAAwA7olltuyYoVKzJ16tRMnTo1SXLJJZdk4sSJmTx5cjp16pR+/fp9qJpz5szJvHnzMmzYsFx++eX51re+lXXr1uXII4/MIYccUrLeK4qiKEpWrQwWLlyYrl27tnQbAAAAO73eU3pvdY1Z588qQScAsH16d1a5tmldqltVlax+qettSxvLbTeV5VrKAwAAAACgBEodIu8oofSWEEwDAAAAAFBWgmkAAAB2CGub1m1XdQCALefmhwAAAOwQqltV5cb/+W9bXWfY9QNK0A0AvKUoilRUVLR0Gy1qS25jaMY0AAAAAMAWaN26dZYtW7ZFwezOoiiKLFu2LK1bt/5Q48yYBgAAAADYAnvttVdefvnlvPbaay3dSotq3bp19tprrw81RjANAAAAALAFWrVqlX333bel29ghWcoDAAAAAICyEkwDAAAAAFBWgmkAAAAAAMpKMA0AAAAAQFkJpgEAAAAAKCvBNAAAAAAAZSWYBgAAAACgrATTAAAAAACUlWAaAAAAAICyEkwDAAAAAFBWgmkAAAAAAMpKMA0AAAAAQFkJpgEAAAAAKCvBNAAAAAAAZSWYBgAAAACgrATTAAAAAACUlWAaAAAAAICyEkwDAAAAAFBWgmkAAAAAAMpKMA0AAAAAQFkJpgEAANhm1jc2tnQLAMB2qLqlGwAAAGDnVVlbm4f7HF2SWkfPfLgkdQCAlmfGNAAAAAAAZSWYBgAAAACgrATTAAAAAACUlWAaAAAAAICyEkwDAAAAAFBWgmkAAAAAAMpKMA0AAAAAQFkJpgEAAHYSxdrGlm4BAGCzVLd0AwAAAJRGRXVtXrziMyWp1XHcEyWpAwCwMWZMAwAAAABQVoJpAAAAAADKSjANAAAAAEBZCaYBAAAAACgrwTQAAAAAAGUlmAYAAAAAoKwE0wAAAAAAlJVgGgAAAACAshJMAwAAAABQVoJpAAAAAADKSjANAAAAAEBZCaYBAAAAACgrwTQAAAAAAGUlmAYAAAAAoKwE0wAAAAAAlJVgGoDtTrG2cbusBQAAAJRGdUs3AADvVlFdmxev+ExJanUc90RJ6gAAAAClY8Y0AABAC2psWtfSLQAAlJ0Z0wAAAC2otlVVDhv1k5LUmjfprJLUAQDY1syYBgAAAACgrATTAAAAAACUlWAaAAAAAICyEkwDAAAAAFBWgmkAAAAAAMpKMA0AAAAAQFkJpgHYqTWubdyu6gAAAABJdUs3AADbUm11bXpP6b3VdWadP6sE3QAAAACJGdMAAAAAAJSZYBoAAAAAgLISTAMAAAAAUFaCaQAAAAAAykowDQAAAABAWQmmAQAAAAAoK8E0AAAAAABlJZgGAAAAAKCsqktdsKmpKWPGjMnixYuzZs2aDB06NPvvv39Gjx6dioqKHHDAARk/fnwqKyszffr03HXXXamurs7QoUPTt2/fUrcDAAAAAMB2puTB9AMPPJD27dtn0qRJWb58eU466aR06dIlI0eOTK9evTJu3LjMmDEj3bt3z7Rp03LPPfeksbExgwYNSu/evVNTU1PqlgBgq61vbExlbe12VwsAAAB2RCUPpvv3759+/fo1b1dVVWXBggU5/PDDkyR9+vTJrFmzUllZmUMPPTQ1NTWpqalJx44ds2jRonTr1q3ULQHAVqusrc3DfY4uSa2jZz5ckjoAAACwoyp5MN2mTZskSUNDQ4YPH56RI0fmO9/5TioqKpr319fXp6GhIW3btt1gXENDwwfWb2xszMKFC0vdNh9B++/bMa1at9nqOk2rV+bZ518sQUfA27p27drSLWxzPssAeNtH4XNve+SzGABaVsmD6SRZsmRJzjvvvAwaNCgDBgzIpEmTmvetXLky7dq1S11dXVauXLnB4+8Mqt9PbW2tH9womRev+MxW1+g47gnvSeBD8+8GALQsn8UAsO1t6hfBlaV+sqVLl2bIkCEZNWpUTjnllCTJQQcdlLlz5yZJZs6cmZ49e6Zbt26ZN29eGhsbU19fn+eeey6dO3cudTsAAAAAAGxnSj5j+pZbbsmKFSsyderUTJ06NUlyySWXZOLEiZk8eXI6deqUfv36paqqKoMHD86gQYNSFEUuuOCC1LoRFAAAAADATq/kwfTYsWMzduzY9zx+xx13vOex0047LaeddlqpWwAAAAAAYDtW8qU8AAAAAABgUwTTAAAAAACUlWAaAAAAAICyEkwDAAAAAFBWgmkAAAAAAMpKMA0AAAAAQFkJpgEAAAAAKCvBNAAAAAAAZSWYBgAAAACgrATTAAAAAACUlWAaAAAAAICyEkwDAAAAAFBWgmkAAAAAAMpKMA0AAAAAQFkJpgEAAAAAKCvBNAAAAAAAZSWYBgAAAACgrATTAAAAAACUlWAaAAAAAICyEkwDAAAAAFBWgmkAAAAAAMpKMA0AAAAAQFkJpmE7sr6xcbuqAwAAAADbQnVLNwD8l8ra2jzc5+itrnP0zIdL0A0AAAAAbBtmTAMAAAAAUFaCaQAAAAAAykowDVupca31nIEPZ23Tuu2qDgAAAJSbNaZhK9VW16b3lN4lqTXr/FklqQNs36pbVeXG//lvW11n2PUDStANAAAAlJ8Z0wAAAAAAlJVgGgAAAACAshJMAwAAAABQVoJpAAAAAADKSjANAAAAAEBZCaYBAAAAACgrwTQAAAAAAGUlmAYAAAAAoKwE0wAAAAAAlJVgGgAAAACAshJMAwAAAABQVoJpAAAAAADKSjANAAAAAEBZCaYBAAAAACgrwTQAAAAAAGUlmAYAAAAAoKwE0wAAAAAAlJVgGgAAAACAshJMAwAAAABQVoJpAAAAAADKSjANAAAAAEBZCaYBAAAAACgrwTQAAAAAAGUlmAYAAAAAoKwE0wAAAAAAlJVgGgAAAACAshJMAwAAAABQVoJpAAAAAADKSjANAAAAAEBZCaYBAAAAACgrwTQ7lMamdS3dAgAAAACwlapbugH4MGpbVeWwUT8pSa15k84qSR0AAAAA4MMxYxoAAACgDIq1jdtVHYCWZMY0AAAAQBlUVNfmxSs+s9V1Oo57ogTdALQsM6YBAAAAACgrwTQAAAAAAGUlmAYAAAAAoKwE0wAAAAAAlJVgGgAAAACAshJMAwAAAABQVoJpAAAAAADKSjANAAAAAEBZCaYBAAAAACgrwTQAAAAAAGUlmAYAAAAAoKwE0wAAAAAAlJVgGgAAAACAshJMAwAAAABQVoJpAAAAAADKSjANAAAAAEBZCaYBAAAAACgrwTQAAAAAAGUlmAYAAAAAoKwE0wAAAAAAlJVgGgAAAACAshJMAwAAAHxErW9s3K7qAB8d1S3dAAAAAAAto7K2Ng/3OXqr6xw98+ESdAN8lJgxDQAAAABAWQmmAQAAAAAoq20WTP/lL3/J4MGDkyQLFizIUUcdlcGDB2fw4MH59a9/nSSZPn16Tj755Jx22mn5j//4j23VCgAAAAAA25Ftssb0v/zLv+SBBx7ILrvskiR56qmn8k//9E8ZMmRI8zGvvfZapk2blnvuuSeNjY0ZNGhQevfunZqamm3REgDbWGPTutS2qmrpNgAAAIAdwDYJpjt27JgpU6bkoosuSpI8+eSTef755zNjxozss88+GTNmTObPn59DDz00NTU1qampSceOHbNo0aJ069ZtW7QEwDZW26oqh436SUlqzZt0VknqAAAAANunbRJM9+vXLy+//HLzdrdu3XLqqafm05/+dG6++ebcdNNN6dKlS9q2bdt8TJs2bdLQ0PCBtRsbG7Nw4cJt0TY7gK5du7Z0CzsM3yeUm+/PluF7HWDH5zO0ZfgMpSWU8vu9VO/h7bEn4KNhmwTT73bcccelXbt2zV9PmDAhPXv2zMqVK5uPWbly5QZB9fupra31gxtsBt8n8NHgex0AtozPUHZ02+N7eHvsCWhZm/qF1Ta7+eE7fe1rX8v8+fOTJHPmzMnBBx+cbt26Zd68eWlsbEx9fX2ee+65dO7cuRztAAAAAADQgjZrxvTPf/7znHrqqc3bP/nJT3LWWZu//udll12WCRMmpFWrVtljjz0yYcKE1NXVZfDgwRk0aFCKosgFF1yQ2traD38GAAAAAADsUDYZTP/yl7/MQw89lLlz5+axxx5Lkqxbty7PPPPMBwbTe+21V6ZPn54kOfjgg3PXXXe955jTTjstp5122pb2DgAAAADADmiTwfRRRx2VPffcM2+88Ua+8pWvJEkqKyuz9957l6U5AAAAAAB2PpsMpnfdddf06tUrvXr1yrJly9LY2JjkrVnTAAAAAACwJTZrjenLL788Dz/8cD7+8Y+nKIpUVFRsdGkOAAAAAAD4IJsVTP/lL3/J7373u1RWVm7rfgAAAAAA2MltVtK8zz77NC/jAQAAAAAAW2OzZkwvWbIkffv2zT777JMklvIAAAAAAGCLbVYwff3112/rPgAAAAAA+IjYrGD6F7/4xXseGzZsWMmbAQAAAABg57dZwfQee+yRJCmKIk899VTWr1+/TZsCAAAAAGDntVnB9Omnn77B9jnnnLNNmgEAAAAAYOe3WcH0888/3/z1a6+9liVLlmyzhgAAAAAA2LltVjA9bty45q9ra2tz0UUXbbOGAAAAAADYuW1WMD1t2rQsX748L730Uvbaa6906NBhW/cFAAAAwEY0rm1MbXVtS7cBsFU2K5j+zW9+k+9973vZb7/98swzz2TYsGEZOHDgtu4NAAAAgHepra5N7ym9S1Jr1vmzSlIH4MParGD6xz/+ce699960adMmDQ0N+cd//EfBNAAAAAAAW6Rycw6qqKhImzZtkiR1dXWprfXnIgAAAAAAbJnNmjHdsWPHXHPNNenZs2fmzZuXjh07buu+AAAAAADYSW3WjOnTTjstu+66a2bPnp177703Z5555rbuCwAAAACAndRmBdPXXHNNjjvuuIwbNy533313rrnmmm3dFwAAAAAAO6nNCqarq6uz//77J0n23nvvVFZu1jAAAAAAAHiPzVpj+lOf+lQmT56c7t27Z/78+fn4xz++rfsCAAAAAGAntVlTn6+++up06NAhDz/8cDp06JCrr756W/cFAAAAAMBOarNmTNfW1ubss8/exq0AAAAAAPBRYLFoAAAAAADKSjANAAAAAEBZCaYBAAAAACgrwTQAAAAAAGUlmAYAAAAAoKwE0wAAAAAAlJVgGgAAAACAshJMAwAAAPD/tXfvcVaV9f7AP8PMMCCioqBYiIeLtzwiommGQmWl4kGPJKQoxqGjx0oNQ4QUlDCviFRo5PWoiBaadrzVybQX9MIjGd6NNPCCZHgXBWVgYH5/EPOTGBBlz5oB3u//Zs/s7zx7r/08z16f9ay1AAolmAYAAAAAoFCCaQAAAAAACiWYBgAAAACgUIJpAAAAAAAKJZgGAAAAAKBQgmkAAAAAAAolmAYAAAAAoFCCaQAAAAAACiWYBgAAAACgUIJpAAAAAAAKJZgGAAAAAKBQgmkAAAAAAAolmAYAAAAAoFCCaQAAAAAACiWYBgAAAACgUIJpAAAAAAAKJZgGAACATVx1TXWTrAXA5quisRsAAAAANKyqiqr0nNizJLVmnDajJHUA2LxZMQ2boJply5tkLQAAAABIrJiGTVJFZXmuGHZ3SWqdOr5vSeoAAAAAwCpWTAMAAAAAUCjBNAAAAAAAhRJMAwAAAABQKME0AAAANEG1NdWN3QQAaDBufggAAABNUFlFVeaN3asktTqe+1RJ6gBAqVgxDQAAAABAoQTTAAAAAAAUSjANAAAAAEChBNMAAAAAABRKMA0AAAAAQKEE0wAAAAAAFEowDQAAAABAoQTTAAAAAAAUSjANAAAAAEChBNMAAAAAABRKMA0AAAAAQKEE0wAAAAAAFEowDQA0qNqa6iZZCwAAgMZT0dgNAAA2bWUVVZk3dq+S1Op47lMlqQMAAEDjsmIaAAAAAIBCCaYBAAAAACiUYBoAAAAAgEIJpgEAAAAAKJRgGgAAAACAQgmmAQAAANggNcuWN8laQNNV0dgNAAAAAGDjVlFZniuG3V2SWqeO71uSOkDTZsU0AAAAAACFEkwDAAAAAFAowTQAAAAAAIUSTAMAAAAAUCjBNAAAAAAAhRJMAwAAAABQKME0AAAAAACFEkwDAAAAAFAowTQAUK/qZcsbuwlrqK6pLlmt5dWlqbWiRHUAAAA2JxWN3QAAoGmqqizPvsNv2uA6s8adWILWrFRVUZWeE3uWpNaM02ZkWq/eG1yn9/RpJWgNAADA5qXBVkw/8cQTGTRoUJLkpZdeynHHHZeBAwfmvPPOy4oVK5IkU6dOTb9+/TJgwID8/ve/b6imAAAAAADQhDRIMH3NNddk1KhRqf7Hqa0XXXRRhg4dmltuuSW1tbV54IEH8vrrr2fy5Mn5+c9/nuuuuy6XX355li5d2hDNAQAAAACgCWmQYLpjx46ZOHFi3c/PPPNM9t9//yRJr1698tBDD+XJJ5/MPvvsk+bNm6d169bp2LFj/vKXvzREcwAAAAAAaEIa5BrThx56aObPn1/3c21tbcrKypIkrVq1ynvvvZdFixaldevWdX/TqlWrLFq06CNrV1dXZ/bs2aVvNBuFPfbYo7GbsFnS51gf+mfjaMj+aZuuP+MksCGMt41jYxi7m/JnY2N4/5qiprxNmxqfMdj0FXLzw2bN/v/C7MWLF2errbbKlltumcWLF6/2+IeD6rWpqqoykEPB9DlouvTPpsF2ANj4GLs3jPePhuYzBpuGdR1karCbH37YZz7zmcycOTNJMn369Oy3337p1q1bZs2alerq6rz33nuZO3dudt111yKaAwAAAABAIypkxfSIESMyevToXH755encuXMOPfTQlJeXZ9CgQRk4cGBqa2tzxhlnpKqqqojmAAAAAADQiBosmO7QoUOmTp2aJOnUqVNuvvnmNf5mwIABGTBgQEM1AQAAAACAJqiQS3kAAAAAAMAqgmkAAAAAAAolmAYAAAAAoFCCaQAAAAAACiWYBgAAAACgUIJpAIANULNseZOsBQAA0JRVNHYDAAA2ZhWV5bli2N0lqXXq+L4lqQMAANDUWTENAAD/YAU8AAAUw4ppAAD4ByvgAQCgGFZMAwAAAABQKME0AAAAAACFEkwDAAAAAFAowTQAAAAAAIUSTAMA0Ciqa6qbZC0AAKDhVTR2AwAA2DxVVVSl58SeJak147QZJakDUArVy5anqrK8sZsBAE2aYBoAAABKqKqyPPsOv2mD68wad2IJWgMATZNLeQAAAAAAUCjBNAAAAAAAhRJMAwAAAABQKME0AAAAAACFEkwDAAAAAFAowTQAAAAAAIXa5IPp6mXLm1QdAAAAAIDNXUVjN6ChVVWWZ9/hN21wnVnjTixBawAAAAAA2ORXTAMAUNqzv5xJBgAAbKhNfsU0AAClO4sscSYZAACw4ayYbgTVNdVNshYAAACwOmcKATQMK6YbQVVFVXpO7FmSWjNOm1GSOgAAAMCanHUE0DCsmAYA4GOpdcYWAACwgayYBgDgYymrqMq8sXttcJ2O5z5VgtYAAAAbIyum15OVQQAAAJCsqC7N/nGp6gCwcbJiej2VamVQYnUQAAAAG69mVVWZ1qv3BtfpPX1aCVoDwMbKimkAADZ6Vt0BAMDGxYppAAA2elbvAQDAxsWKaQAAAAAACiWYBgAAAIBGUl1TukuSlbIWNDSX8gAAAACARlJVUZWeE3uWpNaM02aUpA4UwYppAAAAAAAKJZgGAAAAAKBQgmkAAAAAAAolmAYAAAAAoFCCaQAAAAAACiWYBgAAAACgUIJpAAAAAAAKJZgGAAAAAKBQgmkAAAAAAAolmAYAAAAAoFCCaQAAAAAACiWYBgAAAACgUIJpAAAAAAAKJZgGAAAAAKBQgmkAAAAAAAolmAYAAAAAoFCCaQAAAAAACiWYBgAAAACgUIJpAAAAAAAKJZgGAAAAAKBQgmkAAAAAAAolmAYAAAAANjrVy5Y3yVqsn4rGbgAAAAAAwMdVVVmefYffVJJas8adWJI6rD8rpgEAAADYLFhhC02HFdMAAAAAbBassIWmw4ppkiQ1JTzKV8paAAAAALAxqa6pblJ1miorpkmSVFSW54phd5ek1qnj+5akDgAAAABsbKoqqtJzYs8NrjPjtBklaE3TZcU0AAAAAHxMtZv4alZoaFZMAwAAAMDHVFZRlXlj99rgOh3PfaoErYGNjxXTAAAAAAAUSjANAAAAAEChBNPAJqFU1/ZyjTAAAACAhuca08AmwbW9AAAAADYeVkwDAAAAAFAowTQAAAAAAIUSTAMAAAAAUCjBNAAAAAAAhRJMAwAAAABQKME0AAAAAACFEkwDAAAAAFAowfRGbkV1dWM3AQCAjVhtTWm+T5aqDgAAm4eKxm4AG6ZZVVWm9eq9wXV6T59WgtYAALCxKauoyryxe21wnY7nPlWC1gAAsLmwYhoAAAAoXM2y5U2yFgDFsGIaAAAAKFxFZXmuGHZ3SWqdOr5vSeoAUBwrpgEAAAAAKJRgGgAAAACAQgmmAQAAAAAolGAaAAAAAIBCCaYBAIANVl1T3SRrAQBsrFZUl+47USlrlUpFYzcAAADY+FVVVKXnxJ4lqTXjtBklqQMAsDFrVlWVab16l6RW7+nTSlKnlAoNpv/93/89rVu3TpJ06NAhp5xySkaOHJmysrLssssuOe+889KsmUXcAAAAAACbssKC6ep/LBefPHly3WOnnHJKhg4dmgMOOCDnnntuHnjggXzlK18pqkkAAAAAADSCwpYn/+Uvf8kHH3yQIUOG5MQTT8zjjz+eZ555Jvvvv3+SpFevXnnooYeKag4AAAAAAI2ksBXTLVq0yDe/+c30798/L774Yk466aTU1tamrKwsSdKqVau89957RTUHAAAAAIBGUlgw3alTp+y8884pKytLp06dss022+SZZ56p+/3ixYuz1VZbfWSd6urqzJ49e73/7x577PGJ2suG+Tjb6OOwPRtHQ23PUirlZ2NjeL1Nkf7ZOBry82qbNg5z6KalobZnx3/pnFYtqxqkdlNRqveu0790TosSvFdLPqjOCy8+X4IWrUn/bBzm0E2POXTTYnt+cpvTPm0pt2dtTXXKKny/KlJhwfTtt9+e5557LmPGjMmrr76aRYsWpWfPnpk5c2YOOOCATJ8+PZ/73Oc+sk5VVdVmMYhs7GyjTcvmtj03t9fLxs3nddNjm25aGnJ77jv8ppLUmTXuxJLUKbVSvndXDLt7g2ucOr6v/rmJsT03PbbppsX2/OS8d59MWUVV5o3dqyS1Op77VEnqlFpjfDbWFYYXFkwfc8wx+f73v5/jjjsuZWVlufDCC9OmTZuMHj06l19+eTp37pxDDz20qOYAAAAAANBICgummzdvnvHjx6/x+M0331xUEwAAAAAAaAKaNXYDAAAAAADYvAimAQAAAAAolGAaAAAAAIBCCaYBAAAAADZhNcuWN6k6SYE3PwQAAAAAoHgVCxS1ywAAHXhJREFUleW5YtjdG1zn1PF9S9CalayYBgAAAIBNwIrq6iZZC+pjxTQAAAAAbAKaVVVlWq/eJanVe/q0ktSBtbFiGgAAAACAQgmmAQAAAAAolGAaAAAAAFhNzbLlTaoOmx7XmAYAAAAAVlNRWZ4rht29wXVOHd+3BK1hU2TFNNBoqh01BQAAANgsWTENNJqqyvLsO/ymktSaNe7EktQBAAAAoOFZMQ0AAAAAQKEE0wAAAAAAFEowDQAANCkrqqsbuwkAADQw15gGAACalGZVVZnWq3dJavWePq0kdQAAKC0rpgEAAAAAKJRgGgAAAACAQgmmAQAAAAAolGAaYCNQs2x5k6oDAAAAsCHc/BBgI1BRWZ4rht29wXVOHd+3BK0BAAAA2DBWTAMAAAAAUCjBNAAAAAAAhRJMAwAAAABQKME0AAAAAACFEkwDAAAAAFAowTQAAAAAAIUSTAMAAAAAUCjBNAAAAAAAhRJMAwAAAABQKME0AAAAAACFEkwDNJAV1dWN3QQAAACAJqmisRsAsKlqVlWVab16l6RW7+nTSlIHAAAAoCmwYhoAAAAAgEIJpgEAAAAAKJRgGuBDqmtcFxoAAACgobnGNMCHVFVUpefEniWpNeO0GSWpAwAAALCpsWIaAAAAAIBCCaYBAAAAACiUYBoAAAAAgEIJpgEAAAAAKJRgGgAAAACAQgmmAQAAAAAolGAaAAAAAIBCCaYBAAAAACiUYBoAAAAAgEIJpgEAAAAAKJRgGgAAAACAQgmmAQAAAAAolGAaAAAAAIBCCaYBAAAAACiUYBoAAAAAgEIJpgEAAAAAKJRgGgAAAACAQgmmAQAAAAAolGAaAAAAAIBCCaYBAAAAACiUYBoAAAAAgEIJpgEAAAAAKJRgGgAAAACAQgmmAQAAAAAolGAaAAAAAIBCCaYBAAAAACiUYBoAAAAAgEIJpgEAAAAAKJRgGgAAAACAQgmmAQAAAAAolGAaAAAAAIBCCaYBAAAAACiUYBoAAAAAgEIJpgEAAAAAKJRgGgAAAACAQgmmAQAAAAAolGAaAAAAAIBCCaYBAAAAACiUYBoAAAAAgEIJpgEAAAAAKJRgGgAAAACAQgmmAQAAAAAolGAaAAAAAIBCCaYBAAAAACiUYBoAAAAAgEIJpgEAAAAAKJRgGgAAAACAQgmmAQAAAAAolGAaAAAAAIBCVTR2A1asWJExY8bk2WefTfPmzfPDH/4wO++8c2M3CwAAAACABtLoK6Z/97vfZenSpfnFL36RYcOG5eKLL27sJgEAAAAA0IAaPZieNWtWDj744CRJ9+7d8/TTTzdyiwAAAAAAaEiNHkwvWrQoW265Zd3P5eXlqampacQWAQAAAADQkMpqa2trG7MBF110Ufbee+/06dMnSdKrV69Mnz59rX//+OOPp6qqqqjmAQAAAADwCVRXV6d79+71/q7Rb37Yo0eP/P73v0+fPn3y+OOPZ9ddd13n36/thQAAAAAAsHFo9BXTK1asyJgxY/Lcc8+ltrY2F154Ybp06dKYTQIAAAAAoAE1ejANAAAAAMDmpdFvfggAAAAAwOZFMA0AAAAAQKEE0wAAAAAAFEow3YBmzpyZAw88MIMGDcqgQYPSr1+/nH766Vm6dOl617j66qvz5JNPNmAr+biuvvrqDB48OEOGDMk3v/nNPP300xtU7+abby5Ry1gf/9wvBwwYkMmTJ2fQoEGZO3fuWp/3yCOP5C9/+UuSZMqUKTnqqKNy3333FdVsPqGZM2dmt912W2Nb9e3bNyNHjsypp57aSC3b/HzcOXHkyJGZPn36etW+44478oUvfKGu9te//vW6bf6lL30p1dXVH6utPhelVd+8+VFj7vq4//778+qrr5aolayv+fPnp0ePHnX9bdCgQbniiityxRVXrPU5d9xxRy677LI1Hv/w3KrfNS1XX311DjrooHWOn88++2weeeSRJMkZZ5zxsfZx2HD1ja0XXHBBXnnllU9cc+LEibn11lvX+vtXXnklDz74YJJs8P9i7dbV/2699dZMnDixJP/HPNp41rZPuiHmz5+fAQMGlKiFrI/6xuEPz43r48PfhTZXFY3dgE3d5z73uUyYMKHu52HDhuXBBx/MYYcdtl7PP/nkkxuqaXwCc+bMyYMPPphbb701ZWVlmT17dkaMGJG77rrrE9ecNGlSTjjhhBK2ko/y4X65dOnSHHbYYWnduvU6n/PLX/4yffr0ye677577778/l156aXbbbbcimssG6ty5c+6555706dMnycod6Q8++CBJ1hmkUHobOieuy7/927/lzDPPTJK88847OfLII3P44Yd/olo+F6Wztnlz66233uDaN910U8aMGZMddtihBC3l4+jatesG70Anq8+t+l3Tcvfdd6dPnz659957069fv3r/5re//W3atm2bz372s6uN7TS8htgnWR8PP/xwnn/++XzpS1/KOeec06D/a3O2Pv2vFMyjjau+fdKjjjoqW221VSO3jPWxtnH4K1/5St3cuD4+/F1ocyWYLtDSpUvz2muvZeutt84555yTBQsW5O23306vXr0ydOjQvPjiixk1alSWLVuWFi1aZMKECbn00kvTp0+fvPHGG5k2bVqWLFmSefPm5aSTTkq/fv3y5JNP5gc/+EFatWqV7bbbLlVVVbn44osb+6Vusrbddtu88soruf3229OrV6/sscceuf322zNo0KB06tQpL7zwQmprazNhwoS0a9cuF198cWbNmpVkZWjyjW98IyNHjsw777yTd955J717987ChQszZsyYfOMb38j3v//9VFRUpLy8PJdeeqkvCQVYtGhRmjVrlvLy8iTJggULMmbMmFRXV+edd97Jd77znbRv3z5/+MMf8swzz+TJJ5/M008/nXPOOScTJkzI/fffn3vvvTcVFRXZb7/9Mnz48EycODGPPfZY3n///VxwwQUZOXJkdtxxx8yfPz9HHHFE/vrXv+bPf/5zvvCFL+R73/teI78Dm77dd989L774Yt59991stdVWueuuu9K3b9/8/e9/T8+ePTNjxoxMmTIlv/rVr9KsWbP06NEjI0aMqHdM3nbbbRv75WwyPjwn1jdWrrJs2bKcd955eemll7JixYoMHTo0BxxwwDprv/fee2nRokXKysrqHnvuuedy8cUXZ8WKFXn33XczatSovP/++5k6dWp+8pOfJEmOPfbY/OQnP8nRRx+dGTNmZNCgQdl9993z17/+NYsWLcqPf/zjfPrTn86VV16Z3/3ud9l2223zwQcf5Lvf/e5HtmlztbZ585vf/GauvPLKvPHGG/nggw9y+eWXZ6eddqr3szB//vycc845qampSVlZWUaNGpUFCxbU7QDccsst+cUvfpF77rknZWVl6dOnT0488cTV5turrrqqJGE49Zs5c2Z+/vOfZ8KECbntttsyZcqUbL311qmsrKw7KPjEE09kyJAheeutt3Lcccdlzz33rJtbu3btmv79++t3TcTMmTPTsWPHHHvssRk+fHj69euXJ554IhdccEFqa2uzww47ZPTo0bnzzjtTWVmZPffcM0OHDs2vf/3rvP7662v019133z1f/epX06NHj7zwwgvZbrvtMnHixLrvXnx869onGTNmTO6777689NJLefvtt7Nw4cIMHDgwv/3tb/PCCy/kkksuSdu2bfO9730vU6dOTZIMGDAgl19+eV395cuX59xzz11tf/W0007L1VdfnSVLlmSfffbJDTfckDFjxqRdu3YZPnx4Fi1alOXLl+e73/1uDjzwwPTt2zf7779/nn322ZSVleWnP/3pRy4Eof7+96c//SkXXnhhtt566zRr1izdu3fPTTfdlHfffTennnpqli5dmiOPPDJ33XXXWufD5s2b529/+1tee+21XHzxxXn99dfr5tFx48ZlxIgRa3we7rzzztX2aR566KE1alMaq/ZJBw8enHHjxqVLly659dZb88Ybb+Sggw6q659vv/123n///YwfP36Nx2666aa6en/84x8zYcKElJeXZ6eddsrYsWNTWVnZKK9tU1XfODxp0qQMGjSobm48++yz8y//8i9p3rx5zjrrrHXmDF27ds0TTzyRG264Ic2aNcu+++6bM888M2+99VbOPPPMLF26NJ06dcrDDz+cq6++OsOHD8/tt9+eJBk6dGiGDBmSbt26NfK78skIphvYww8/nEGDBuXNN99Ms2bNMmDAgOy0007p3r17+vfvn+rq6rpg+pJLLsnJJ5+cXr165b777suf//zn1WotWrQo1113XV588cWccsop6devX84777xceuml2WWXXTJhwgSn4jSwbbfdNpMmTcrNN9+cK6+8Mi1atMgZZ5yRJOnRo0fGjh2bKVOm5KqrrkrPnj0zf/78TJ06NTU1NRk4cGA+97nPJVl5dHTw4MFJVl7KY8yYMZkyZUr23HPPjBw5Mn/605+ycOFCwXQDWdUvy8rKUllZmdGjR+faa69Nkjz//PP5j//4jxxwwAF59NFHM3HixPz3f/93Dj744PTp0ye9evXKzJkzM2bMmLz//vv59a9/nZ///OepqKjIaaedlt///vdJVq7SHTVqVObPn5+XX345119/fZYsWZJDDjkk06dPT8uWLfPFL35RMF2Qr3zlK7n//vvrDuiddNJJ+fvf/173+zvuuCOjR49O9+7dc8stt6SmpqbeMfmggw5qxFex8atvTlyyZMlax8okue2229KmTZtceOGFefvtt3PCCSfk3nvvXaP2PffckyeeeCJlZWVp2bJlLr300tV+P2fOnIwYMSK77bZb7r777txxxx05//zz88Mf/jALFy7M66+/njZt2mT77bdf7XndunWrOxB17733plevXvnDH/6Q22+/PcuWLUvfvn0b5s3aRKxr3uzdu3eOOuqoTJw4Mb/5zW/StWvXej8LV155ZQYNGpQvf/nLmT17ds4+++zccccd2WOPPTJmzJjMmzcv9913X2655ZaUlZVl8ODBdX31w/MtpTNnzpwMGjSo7uf+/fsnSd56661ce+21+dWvfpXmzZuvFlpUVFTkuuuuy9/+9recfPLJue++++rm1k996lOr1dfvGtdtt92W/v37p3PnzmnevHmeeOKJjB49OhMmTEiXLl0yZcqUvPHGGzn66KPTtm3b1XaEL7300nr768svv5wbb7wxO+64Y4499tg89dRT6d69eyO+yo3busbWVVq0aJHrrrsuV199daZNm5af/exn+eUvf5l77713tQPA9fn73/9e7/7qySefnOeffz6HHHJIbrjhhiQrz/78/Oc/n2984xt59dVXc9xxx+V3v/tdFi9enCOOOCKjR4/OsGHDMn369BxxxBEN9ZZsMurrfxdddFHGjx+fTp065bzzzkuSHHXUURk4cGC+853v5IEHHsgXv/jFdc6Hn/rUpzJ27NhMnTo1v/jFLzJ27Ni6eXRdgeWqfZo5c+bUW7tz586FvC+bonXtk37YPvvsk8mTJ+edd97JKaeckksuuSR77rnnGo+tUltbm9GjR+eWW27Jdtttlx/96Ee58847XeajxNY2Dn94bnz//ffz7W9/O5/5zGfy0EMPrTNn2GKLLTJx4sT88pe/TMuWLTN8+PDMmDEj06ZNyyGHHJLjjz8+M2bMyIwZM9KpU6e0aNEic+bMSdu2bTN//vyNNpROBNMNbtXpGW+//XaGDBmSDh06ZJtttslTTz2Vhx9+OFtuuWXd9dheeOGF7LPPPklSt7rknnvuqau1amn/jjvuWPec1157LbvsskuSZN9993XN2wb20ksvZcstt8xFF12UJHnqqady8sknp23btnVBSo8ePfLggw+mffv22W+//eommr333rvuepqdOnVao/YxxxyTa665Jv/5n/+Z1q1br/HlktL558sJJKn7EtCuXbtMmjQpt99+e8rKylJTU7PWOs8//3z23nvvui9z++23X/76178mWX0b77TTTmndunWaN2+etm3bZptttkmS1VZz0rD69u2bMWPGZKeddsp+++23xu8vuuiiXH/99bnsssvSvXv31NbW1jsms2HqmxPnzp271rEyWbnSedasWXX3W6ipqckTTzxRd73aI488MuXl5atdyqM+22+/fX7605+mRYsWWbx4cbbccsuUlZXlyCOPzD333JP58+fnmGOOWeN5n/nMZ5Ik7du3zxtvvJG5c+dmr732Snl5ecrLy/Ov//qvpXyLNjnrmjdXvXdt27ate2/r+yzMnTu37nTIPfbYIwsWLFjtfzz33HN55ZVX6gLohQsXZt68eUnqn2/ZcP98KY+ZM2cmSebNm5cuXbqkZcuWSVI3hiYr+1JZWVnatWuXJUuWrLO+ftd4Fi5cmOnTp+ett97K5MmTs2jRotx88815880306VLlyTJ8ccfnyR11xr+sLX11zZt2mTHHXdMsnJf5uNe+5/VrWtsXWVVP2rdunW6du2aJNl6663rfe9ra2tX+3lt+6v1mTt3bt3Boh122CFbbrll3nrrrdXaYJuvn7X1v1dffbVuPuvRo0fmzZuXrbfeOnvssUdmzZqVO++8MyNGjMizzz671vlwjz32SLJyXH300UfX2Y4Pfx5W/d+1zbWC6U9uXfukyerbYfHixfnOd76T008/PXvuuWe9j82fPz/JyoPEr732WoYOHZokWbJkSXr27NnQL2ezs7Zx+IgjjlhtLF7Vhz4qZ5g3b17eeuutusv5Ll68OC+//HLmzp2bo48+OklW24/t379/7rjjjnzqU5/KkUce2aCvtaG5+WFB2rRpk3HjxmXUqFG54YYb0rp164wfPz5DhgzJkiVLUltbmy5duuSpp55Kktx1111rXLuvvhCrffv2mTNnTpKVp0jSsJ599tm60y+SlYNM69atU15eXncTxEcffTRdu3ZNly5d6k5HXrZsWR577LHsvPPOSVbflqsmnAceeCD77rtvbrzxxhx22GH1Hi2l4f34xz/OUUcdlXHjxuWAAw6o2z5lZWVrfGnv3LlznnzyydTU1KS2tjaPPPJI3cTTrNn/H14F0I1vp512yvvvv5/JkyfXO3FPnTo1P/jBD3LzzTdn9uzZeeyxxz5yTOaT+/Cc2LZt27WOlcnKfnbEEUdk8uTJueaaa3LYYYdlr732yuTJkzN58uS6lZof5YILLsjpp5+eSy65JLvuumtdf/7a176W3/zmN3nkkUfSu3fvj6zTtWvXPPXUU1mxYkWWLl26xtlNrG5d8+Y/W9u82aVLl/zpT39KksyePbvuy/6qcblz587p2rVrbrrppkyePDn9+vXLrrvuWvc3FKdjx455/vnns2TJkqxYsWK1G3jXty3qm1vro98V56677srXvva1XH/99bnuuusyderUzJgxI1VVVXnxxReTrLzZ0/3335+ysrKsWLFiteevq79SOusztq7rPa+qqsqbb76Z5cuX5913360LtFa544476t1fbdas2Tq3+auvvpp3333XIoxPaG39r3nz5nUH7Vd9N01WXnLjxhtvzJIlS9KlS5ePPR+uGoPX9XlYtU+zrtqUTvPmzfP6668nSd1ct3Tp0px++uk5/vjj8/nPf36tj63Spk2btG/fPj/96U8zefLknHLKKS591QDWNg5vs802q42Tq/rQR+UMHTp0yI477pjrr78+kydPzgknnJC99947u+66ax577LEkyeOPP15X97DDDsuMGTNy//33b/TBtBXTBeratWsGDRqU2bNn54UXXsisWbPSsmXL7Lzzznnttddy1lln5dxzz82kSZPSokWLjBs3Ls8888w6a5533nk5++yzs8UWW6SystKlHxrYV7/61cydOzf9+/fPFltskdra2px11lm58cYbc+edd+aGG26oO4W8TZs2+eMf/5ivf/3rWbZsWQ477LC6o5sf1qVLl5x55pk5/fTT665P3KxZs3z/+99vhFfIYYcdlgsuuCBXXXVVdtxxx7z99ttJkr333juXXXZZOnToUPe3u+22Ww4//PAcd9xxWbFiRfbdd998+ctf3uzvqttU9enTJ//zP/+TTp065eWXX17td7vttluOOeaYtGnTJjvssEP23nvvesdkSmfVnPjggw+mQ4cOax0rjz322IwaNSonnHBCFi1alIEDB6524Gd9HXnkkfn2t7+d7bbbLu3bt6/r2zvssENatWqV7t27p6Lio78W7bbbbundu3cGDBiQNm3apLKycr2et7la17z5z774xS/WO2+eddZZGT16dK6//vrU1NTkggsuSLJyNe5ZZ52V66+/PgceeGCOO+64LF26NN26dfN9qJFsu+22OemkkzJw4MBss802qa6uTkVFxVrPPqpvbq2Pflec2267bbVLIbVs2TJf/epX07Zt25x99tlp1qxZ2rVrl8GDB6eysjKXXnpp3UrqJGvtr5TWxxlb69OuXbv07NkzxxxzTDp27LjaAeEkOfDAA/O9731vjf3VXXfdNZMmTVptnv6v//qvnH322fnf//3fLFmyJGPHjtU/P6G19b/27dtnxIgRadWqVVq1alV3z4T9998/o0ePzre+9a0kK8+w/jjz4Yfn0XV9Hj5JbT6ZE088MWPHjs2OO+5Yd3m5m266Kc8880xqampy6623JkkOPvjgNR4bOXJkkpVB6DnnnJOTTz45tbW1adWq1RqXuGPDrW0crqioWGNuTD46Z/jRj36UwYMHZ9CgQVm+fHk+/elP5/DDD89JJ52Us846K7/+9a+z/fbb142vVVVV+exnP5u33nqr7mDgxqqsdn2WKdBkTZkyJYcffni23XbbTJgwIZWVlTn11FMbu1mbnVU3GvnnwQeApm/VTnV9O2L/7M0338xvfvObHH/88Vm6dGmOOOKI3HjjjWtcIxc2RzU1NbnmmmvqQpLjjz8+Q4cOXe8706+NfgcAbI6mTZuWNm3apFu3bnnooYfys5/9rO5Gl2PGjMmhhx6aAw88sJFbuWEcytzIbbfddhkyZEi22GKLtG7dOhdffHFjNwkANgpLlizJwIEDc/DBB69XKJ2sPD3y6aefzte+9rWUlZWlf//+wjH4h4qKinzwwQc5+uijU1lZmW7dutV7Xf+PS78DADZHHTp0yNlnn53y8vKsWLEi55xzTpJkyJAh2X777Tf6UDqxYhoAAAAAgIK5+SEAAAAAAIUSTAMAAAAAUCjBNAAAAAAAhXLzQwAAKLGXX34548aNy4IFC9KiRYu0aNEiw4cPzy677NLYTQMAgCbBzQ8BAKCEPvjgg/Tv3z/nn39+9tlnnyTJk08+mXHjxmXy5MmN3DoAAGgaBNMAAFBC9913Xx599NGMGjVqtcdra2uzYMGCjB49OtXV1amqqsr555+f5cuXZ9iwYWnfvn1efvnl7LXXXvnBD36QiRMn5rHHHsv777+fCy64IA899FDuueeelJWVpU+fPjnxxBMb6RUCAMCGcykPAAAoofnz56djx451P3/rW9/KokWL8tprr6V9+/YZMmRIevfunf/7v//LZZddljPOOCMvvvhirrvuurRs2TJf/vKX8/rrrydJOnfunFGjRmXOnDm57777csstt6SsrCyDBw/OQQcdlM6dOzfWywQAgA0imAYAgBJq3759nn766bqfJ02alCQZMGBAHn/88Vx11VW59tprU1tbm8rKyiRJx44ds+WWWyZJ2rVrl+rq6iRJp06dkiTPPfdcXnnllQwePDhJsnDhwsybN08wDQDARkswDQAAJXTIIYfkmmuuyeOPP57u3bsnSV566aUsWLAg3bp1yxlnnJEePXpk7ty5eeSRR5IkZWVl9dZq1qxZkpUrp7t27Zprr702ZWVlueGGG7LrrrsW84IAAKABCKYBAKCEWrVqlUmTJmX8+PG57LLLUlNTk4qKipx//vnp3LlzxowZk+rq6ixZsiTnnHPOetXcfffdc+CBB+a4447L0qVL061bt+ywww4N/EoAAKDhuPkhAAAAAACFatbYDQAAAAAAYPMimAYAAAAAoFCCaQAAAAAACiWYBgAAAACgUIJpAAAAAAAKJZgGAAAAAKBQgmkAAAAAAAolmAYAAAAAoFD/DxzAfYZqsnalAAAAAElFTkSuQmCC\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "plt.figure(figsize=(25,10))\n", + "\n", + "sns.countplot(x='Genre',\n", + " data=data.loc[data.Year.isin(data.Year.value_counts().head(5).index),:],\n", + " hue='Year',\n", + " ).set_title('Distribution of genres for top five producing years')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "source": [ + "8.2 Animate pie-chart in Task 7" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
NA_SalesEU_SalesJP_SalesOther_Sales
Year
198010.590.670.000.12
198133.401.960.000.32
198226.921.650.000.31
19837.760.808.100.14
198433.282.1014.270.70
\n", + "
" + ], + "text/plain": [ + " NA_Sales EU_Sales JP_Sales Other_Sales\n", + "Year \n", + "1980 10.59 0.67 0.00 0.12\n", + "1981 33.40 1.96 0.00 0.32\n", + "1982 26.92 1.65 0.00 0.31\n", + "1983 7.76 0.80 8.10 0.14\n", + "1984 33.28 2.10 14.27 0.70" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Convert data into year-wise\n", + "df1 = (data\n", + " .loc[:,['Year','NA_Sales','EU_Sales','JP_Sales','Other_Sales']]\n", + " .dropna()\n", + " .astype({'Year':int})\n", + " .groupby('Year')\n", + " .sum()\n", + ")\n", + "df1.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 80, + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "outputs": [], + "source": [ + "from matplotlib import animation, rc\n", + "from IPython.display import HTML, Image\n", + "\n", + "def update(i):\n", + " if(df1.index.min() == df1.index[i]):\n", + " # when we have no data 1980-1980\n", + " plot,_ = ax.pie(x=np.zeros(0))\n", + " else:\n", + " ax.clear()\n", + " ax.axis('equal')\n", + " plot = (df1\n", + " .head(i)\n", + " .sum()\n", + " .plot.pie(y=df1.columns,\n", + " startangle=270,\n", + " autopct='%.1f%%', \n", + " title=\"Sales by region {}-{}\".format(df1.index.min(),df1.index[i]),\n", + " label='')\n", + " )\n", + "\n", + " return(plot)\n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": 82, + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "outputs": [ + { + "data": { + "text/html": [ + "" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 82, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# equivalent to rcParams['animation.html'] = 'html5'\n", + "rc('animation', html='html5')\n", + "fig, ax = plt.subplots()\n", + "animator = animation.FuncAnimation(fig, update, frames=df1.shape[0], repeat=False)\n", + "animator" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "source": [ + "## Resources: \n", + "- You will need to install FFmpeg and setup PATH variables before you can use it\n", + "- https://medium.com/@suryadayn/error-requested-moviewriter-ffmpeg-not-available-easy-fix-9d1890a487d3\n", + "- https://www.gyan.dev/ffmpeg/builds/ffmpeg-release-full.zip" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "source": [ + "## What useful insights would you like to take to the stakeholders planning to come up with a new game?" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "source": [ + "## Take away activity : Understand the data more, complete the analysis keeping in mind the above business stakeholder question or any other and share your notebook on slack" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "slideshow": { + "slide_type": "skip" + } + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.3" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/Visualize Indian Election Data Analysis using Pandas/notebook/Visualization_Code_Along_MK.ipynb b/Visualize Indian Election Data Analysis using Pandas/notebook/Visualization_Code_Along_MK.ipynb new file mode 100644 index 0000000..6d41d0d --- /dev/null +++ b/Visualize Indian Election Data Analysis using Pandas/notebook/Visualization_Code_Along_MK.ipynb @@ -0,0 +1,749 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Indian Election Analysis\n", + "\n", + "India's lower house of Parliament, the Lok Sabha, has 543 seats in total. Members of Lok Sabha (House of the People) or the lower house of India's Parliament are elected by being voted upon by all adult citizens of India, from a set of candidates who stand in their respective constituencies. Every adult citizen of India can vote only in their constituency. Candidates who win the Lok Sabha elections are called 'Member of Parliament' and hold their seats for five years or until the body is dissolved by the President on the advice of the council of ministers.\n", + "\n", + "There are more than 700 million voters with more than 800,000 polling stations.\n", + "\n", + "The Lok Sabha election is a very complex affair as it involves a lot of factors. It is this very fact that makes it a perfect topic to analyze.\n", + "\n", + "Currently there are two major parties in India, Bhartiya Janta Party(BJP) and Indian National Congress(INC).\n", + "\n", + "As India is country of diversities, and each region is very different from every other region, there are a lot of regional or state parties having major influences. These parties can either support any of the alliance to make a government or can stay independent.\n", + "\n", + "There are two major alliances, the NDA led by BJP and the UPA led by INC.\n", + "\n", + "## About the data set\n", + "\n", + "### There are two datasets:\n", + "\n", + "#### 1. 2009 Candidate dataset:\n", + "\n", + "The candidate dataset has 15 features namely 'ST_CODE', 'State_name', 'Month', 'Year', 'PC_Number', 'PC_name', 'PC_Type', 'Candidate_Name', 'Candidate_Sex', 'Candidate_Category', 'Candidate_Age', 'Party_Abbreviation', 'Total_Votes_Polled', 'Position','Alliance'.\n", + "\n", + "#### 2. 2009 Electors dataset\n", + "\n", + "The elector dataset consist of 8 features namely 'STATE CODE', 'STATE', 'PC NO', 'PARLIAMENTARY CONSTITUENCY','Total voters', 'Total_Electors', 'TOT_CONTESTANT', 'POLL PERCENTAGE'." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Loading necessary Libraries and dataset" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": "STATE CODE 0\nSTATE 0\nPC NO 0\nPARLIAMENTARY CONSTITUENCY 0\nTotal voters 0\nTotal_Electors 0\nTOT_CONTESTANT 0\nPOLL PERCENTAGE 0\ndtype: int64" + }, + "metadata": {}, + "execution_count": 21 + } + ], + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "import warnings\n", + "warnings.filterwarnings(\"ignore\")\n", + "\n", + "# Read both the datasets\n", + "electors_2009 = pd.read_csv('../data/LS2009Electors.csv')\n", + "# quick check for any null values\n", + "electors_2009.isnull().sum()" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": "(8070, 15)" + }, + "metadata": {}, + "execution_count": 22 + } + ], + "source": [ + "candidate_2009 = pd.read_csv(\"../data/candidate09.csv\")\n", + "# see the shape \n", + "candidate_2009.shape" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Task 1 : Plot a bar chart to compare the number of male and female candidates in the election" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "# Finding the value counts of both the genders\n", + "# Plotting a bar graph\n", + "candidate_2009.Candidate_Sex.value_counts().plot(kind='bar',rot=0, title='Gender comparison 2009 elections')\n", + "plt.xticks(ticks=(0,1), labels=('Male Candidates', 'Female Candidates'))\n", + "plt.ylabel('No. of candidates')\n", + "plt.xlabel('Gender')\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Insight: Female candidates are significantly less. " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Task 2 : Plot a histogram of the age of all the candidates as well as of the winner amongst them. Compare them and note an observation" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": " winner all\ncount 541.000000 8070.000000\nmean 53.059150 45.837673\nstd 11.215739 11.831528\nmin 26.000000 25.000000\n25% 45.000000 37.000000\n50% 53.000000 45.000000\n75% 60.000000 54.000000\nmax 88.000000 99.000000", + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
winnerall
count541.0000008070.000000
mean53.05915045.837673
std11.21573911.831528
min26.00000025.000000
25%45.00000037.000000
50%53.00000045.000000
75%60.00000054.000000
max88.00000099.000000
\n
" + }, + "metadata": {}, + "execution_count": 24 + } + ], + "source": [ + "# Selecting the subset of the data with winner candidates\n", + "# looking at their summary statistics\n", + "pd.DataFrame({'winner':candidate_2009[candidate_2009.Position == 1].Candidate_Age.describe(),\n", + " 'all':candidate_2009.Candidate_Age.describe()})\n", + "\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Meidan age is 53 for winners whereas it is 45 for all candidates, indicating winners are bit older (or experienced)" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": {}, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "# Lets plot the winners\n", + "winner = candidate_2009[candidate_2009.Position == 1].Candidate_Age\n", + "winner.plot.hist(bins=25, title='Winner Candidates')\n", + "plt.xlabel('Age of the Candidates')\n", + "plt.ylabel('Number of Candidates')\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Seems like Age is normally distribution for winning candidates" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": {}, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "# Lets plot them side by side\n", + "\n", + "# Histogram of the age of all the candidates\n", + "fig,ax = plt.subplots(nrows=1,ncols=2,tight_layout = True)\n", + "\n", + "ax[0].hist(list(candidate_2009.Candidate_Age),bins = 25)\n", + "ax[0].set_xlabel('Age of the Candidates')\n", + "ax[0].set_ylabel('Number of Candidates')\n", + "ax[0].set_title('All the Candidates')\n", + "\n", + "ax[1].hist(list(winner),bins = 25,color = 'red')\n", + "ax[1].set_xlabel('Age of the Candidates')\n", + "ax[1].set_ylabel('Number of Candidates')\n", + "ax[1].set_title('Winner Candidates')\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Insight: Most of the candidates are of the in the age bracket of 40 - 50 but the age bracket of winner candidates is between 50-70." + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": {}, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "# How about we overlap these histogram to see the proportions\n", + "bins =25\n", + "plt.figure(figsize=(8,8))\n", + "\n", + "candidate_2009.Candidate_Age.plot.hist(bins=bins,label='all')\n", + "candidate_2009[candidate_2009.Position == 1].Candidate_Age.plot.hist(bins=bins,label='winners')\n", + "plt.legend(loc='upper right')\n", + "plt.title('Age comparison winners vs all')\n", + "plt.xlabel('Age')\n", + "plt.ylabel('No of candidates')\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Insight: Mostly older candidates are the winners (50-65), but the proportion of candidates participating vs winning is drastically different, indicating only a handful of people get selected. It would be interesting to see which parties have the highest conversion rate." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Task 3 : Plot a bar graph to get the vote shares of different parties" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": " Party_Abbreviation Total_Votes_Polled\n0 TDP 372268.0\n1 INC 257181.0\n2 PRAP 112930.0\n3 BJP 57931.0\n4 BSP 16471.0\n... ... ...\n8065 IND 422.0\n8066 IND 378.0\n8067 IND 378.0\n8068 IND 375.0\n8069 IND 298.0\n\n[8070 rows x 2 columns]", + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
Party_AbbreviationTotal_Votes_Polled
0TDP372268.0
1INC257181.0
2PRAP112930.0
3BJP57931.0
4BSP16471.0
.........
8065IND422.0
8066IND378.0
8067IND378.0
8068IND375.0
8069IND298.0
\n

8070 rows × 2 columns

\n
" + }, + "metadata": {}, + "execution_count": 28 + } + ], + "source": [ + "# Lets see the features required for this plot\n", + "candidate_2009[['Party_Abbreviation', 'Total_Votes_Polled']]\n" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": {}, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "# Group the dataframe by 'Party_Abbreviation' and sum the 'Total _Votes_Polled'\n", + "# Plot the vote share of top 10 parties\n", + "candidate_2009.groupby('Party_Abbreviation')['Total_Votes_Polled'].\\\n", + " sum().sort_values(ascending=False)[:10].\\\n", + " plot.bar(rot=0, title='Vote shares')\n", + "plt.ylabel('No of votes in millions')\n", + "plt.xlabel('Party')\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Insight: The vote share of Indian National Congres(INC) is highest followed by the Bhartiya Janta Party(BJP). The intresting observation here is the Bahujan Samaj Party(BSP) despite being a regional party has the third highest number of vote share. Indicating the state of UP is deciding factor." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Task 4 : Plot a barplot to compare the mean poll percentage of all the states" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": {}, + "outputs": [], + "source": [ + "## Adapted from https://stackoverflow.com/a/56780852/8210613 to show values on the bars\n", + "def show_values_on_bars(axs, h_v=\"v\", xspace=0.4, yspace=0.4, unit='%'):\n", + " def _show_on_single_plot(ax):\n", + " if h_v == \"v\":\n", + " for p in ax.patches:\n", + " _x = p.get_x() + p.get_width() / 2\n", + " _y = p.get_y() + p.get_height()\n", + " value = str(round(float(p.get_height()),2)) + unit\n", + " ax.text(_x, _y, value, ha=\"center\") \n", + " elif h_v == \"h\":\n", + " for p in ax.patches:\n", + " _x = p.get_x() + p.get_width() + float(xspace)\n", + " _y = p.get_y() + p.get_height() + float(yspace)\n", + " value = str(round(float(p.get_width()),2)) + unit\n", + " ax.text(_x, _y, value, ha=\"left\")\n", + "\n", + " if isinstance(axs, np.ndarray):\n", + " for idx, ax in np.ndenumerate(axs):\n", + " _show_on_single_plot(ax)\n", + " else:\n", + " _show_on_single_plot(axs)" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": {}, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "import seaborn as sns\n", + "\n", + "# Mean POLL PERCENTAGE of all the STATES\n", + "polls = electors_2009.groupby('STATE')['POLL PERCENTAGE'].mean().sort_values(ascending=False)\n", + "# Generating a bar plot\n", + "plt.figure(figsize=(6,20))\n", + "sns_t = sns.barplot(polls,polls.index)\n", + "show_values_on_bars(sns_t, \"h\", -10.2,-0.3 ,'%')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Insigt: Top 3 (out of top 5) are Northeastern states. UP is voting less than 50%, still their regional parties BSP and SP are among top-six most voted parties, imagine what would happen if more no. of people in UP participate in elections." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Task 5 : Plot a bar plot to compare the seats won by different parties in Uttar Pradesh" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": {}, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "# Find winners in UP and count the party affiliation\n", + "ax = candidate_2009[ (candidate_2009.Position == 1 ) & ( candidate_2009.State_name == 'Uttar Pradesh') ].\\\n", + " Party_Abbreviation.value_counts().plot.barh(title='Winning Seats distributions by party in UP')\n", + "\n", + "# to show in descending order\n", + "ax.invert_yaxis()\n", + "\n", + "plt.xlabel('No. of seats')\n", + "plt.ylabel('Party')\n", + "\n", + "# [optional] to save the figure to be included in external reports\n", + "plt.savefig('up.png')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Insight: Regional parties have major influencies, the highest number of seats won in UP are by Samajwadi Party(SP). Also Bahujan Samaj Party(BSP) is trailing behind INC only by a few seats." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Task 6 : Plot a stacked bar chart to compare the number of seats won by different `Alliances` in Gujarat, Madhya Pradesh and Maharashtra. " + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": {}, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "# Subset the the dataset for the states of Gujarat, Maharashtra and Madhya Pradesh\n", + "states_list = ['Gujarat', 'Madhya Pradesh', 'Maharashtra']\n", + "states = candidate_2009[candidate_2009.State_name.isin(states_list)][candidate_2009.Position ==1]\n", + "\n", + "# Stacked bar plot\n", + "states.groupby(['State_name', 'Alliance']).size().unstack().\\\n", + " plot.bar(stacked=True,figsize=(10,10),rot=0, \n", + " title='Alliance-wise seat distributions in three states')\n", + "plt.xlabel('States')\n", + "plt.ylabel('No. of seats')\n", + "plt.show()\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Insight: Maharashtra is dynamic state with various parties winning seats in the election with UPA getting the highest seats followed by NDA." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Task 7 : Plot a grouped bar chart to compare the number of winner candidates on the basis of their caste in the states of Andhra Pradesh, Kerala, Tamil Nadu and Karnataka" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": {}, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "# Subset the data with the winner of each constituency of the mentioned states\n", + "states_list = ['Andhra Pradesh', 'Kerala', 'Tamil Nadu', 'Karnataka']\n", + "states = candidate_2009[candidate_2009.State_name.isin(states_list)][candidate_2009.Position ==1]\n", + "\n", + "# Plotting the grouped bar\n", + "states.groupby(['Alliance', 'Candidate_Category']).size().unstack().\\\n", + " plot.bar(figsize=(8,8),rot=0, title =\"2009 Winning Category\")\n", + "plt.xlabel(\"Party-wise Candidate Category\", fontsize=12)\n", + "plt.ylabel(\"No. of seats\")\n", + "plt.show()\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Insight: Most of the winner candidates are from general category with UPA having the highest number of SC candidates." + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "metadata": {}, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "# But if we remove GEN category and only focus on SC, ST we might see a different picture\n", + "\n", + "# Plotting the grouped bar\n", + "states[states.Candidate_Category!='GEN'].\\\n", + " groupby(['Alliance', 'Candidate_Category']).size().unstack().\\\n", + " plot.bar(figsize=(8,8),rot=0, title =\"2009 Winning Category excl. GEN\",\n", + " color=['tab:orange','tab:green'])\n", + "plt.xlabel(\"Party-wise Candidate Category\", fontsize=12)\n", + "plt.ylabel(\"No. of seats\")\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Insight: SC and ST are under-represented in the lower house of the parliament." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Task 8 : Plot a horizontal bar graph of the Parliamentary constituency with total voters less than 100000" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "metadata": {}, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "# Constituency with less than 100000 voters\n", + "# Plot a horizontal bar graph to compare constituencies with less than 100000 voters\n", + "electors_2009[electors_2009.Total_Electors < 100000].\\\n", + " sort_values(ascending=True,by='Total_Electors').\\\n", + " plot.barh(x='PARLIAMENTARY CONSTITUENCY',y='Total_Electors',\n", + " figsize=(10,10), \n", + " title=\"Parliamentary constituencies with less than 1 lakh total voters\")\n", + "\n", + "plt.xlabel('Number of Voters')\n", + "plt.ylabel('Parliamentary Constituencies')\n", + "plt.show()\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Insight: Only two constituencies which have electors strength less than one lakh." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Task 9: Plot a pie chart with the top 10 parties with majority seats in the elections" + ] + }, + { + "cell_type": "code", + "execution_count": 48, + "metadata": {}, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAi0AAAI+CAYAAABwqzpHAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjMsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+AADFEAAAgAElEQVR4nOzdd3hUVf4/8PeZmsqEhBoSuPQUIDRRQYqCyG5QUb7YRXFtPwXBPrKKsSxGRbGg4OrKCthBLAw2QEGaCkIAIfQokIQaQiAhZeb8/rgTNoY0YGbOlPfreeaR3Dn33vckceaTc849V0gpQUREROTvDKoDEBERETUEixYiIiIKCCxaiIiIKCCwaCEiIqKAwKKFiIiIAgKLFiIiIgoILFqIgpQQYqcQ4kIF57UKIY4LIeLraJMvhLjIl7nc5+0khDh6Dvs/JYSY5slMRNRwLFoooLk/HCsfLiFESZWvb/TwuW4UQqxyn+ObGp4/TwixXghRLIT4RQjRxZPnryfbR0KIx6tuk1K2l1Ku8lWGKuctlVJGSSlza8umipRym5Qy5hz2f1JKORYAhBBJQogKz6U7d/6YiciTWLRQQHN/OEZJKaMA/Ang8irb3vfw6Q4DeAnAy9WfEEKEA/gCwL8BNAbwKYD5QgiThzOcRghh9PY5CPDFz5KI6saihYKaECJcCPGGECJPCLFXCPGiEMLsfm6YEGKHu8v/iBBilxBiVG3HklJ+I6WcCyCvhqcvBXBSSvmmlLIUenETDaDGIRAhxGohxDNCiLVCiEIhxDwhhM39nMn99X4hxFEhxA9CiM5V9v1ICPGaEOI7IcQJAP8EMBLAE+4epk/d7U4NwQghjEKIJ9yv8ZAQ4n0hRIz7uUj3MY+4z/ezEKJxDZn/X+Wx3V/vEULMqvL1Afdf+mFCCCmESBBC3FdTNrfzhBCb3K//fSGEpZbv1d1CiCVCiGnuttuFEL2FEHcKIfa5v0/XVWl/lRAiSwhxTAjxhxBiYpXn/tITIYRoLYRY6H7t24QQt1R5LlMI8YEQ4mMhRBGA69zb3nE3WQbAWKVnr5/7nB2rHCPB3fN2Wu+OO8ty92s6WO172cX9mguEEFuEECMa8vpqyNSjrvMQBRoWLRTsngLQDUBXAL0ADALwSJXnNQAWAC0A3AngPSFE27M4TyqArMovpJQuAJvc22szGsCNAFq5M7xU5bkvAbR358oG8F61fW8C8AT0wuh5APMAPOPuYaqp8HoYwFDoRVQCgHIAU93P3Q7A5M7RBMBYAGU1HGMpgIEAIIRoB6ACwAD31yn6y5bZVXeQUr5WR7b/AzAYQAcA5wO4oYZzVuoPYCWAOACfu4+ZDKAtgDsATBdChLnbHnMfKwbAVQAeEkIMq+W4nwLYCqCle5+pQoh+VZ4fCf17b3Ofs6oBAJxVevZWAJgL/WdT6UYADillTfNonnO/lhgArQG8BQBCiEYAvgfwH+g/j9EA3hVCdGjA66ueaV1t5yEKRCxaKNjdCOBJKeUhKeV+AM8CuLnK8xUAnpJSlkkpFwFYBP3D9ExFASistq0QelFRm5lSymwp5XEATwK4HgCklBVSyveklMellCehF159qnwoA8BcKeXPUkqXu2enPncBsEspc6sc81ohhIBewDQF0N597l+llCeqH0BKuRk4VaAMAPAVgGNCCA16MbO0ATmqmiql3C+lPAhgIYDudbTNllJ+IKWsAPAJ9A/fDPfP7UvoRZ/mzrlYSvm7+3vzm7v9wOoHdPeIpAGY6J6HswZ6gVL192OplHKh+1glDXhN70H/nat0E4DZtbQtd2duIaUscRc9gF6IbJJSvi+ldEopf4X+vR55Jq+vAechCjgsWihouT+QWwD4o8rmP6D3KFQ66P4Qr/p8rVe91OE4gEbVtjUCUFTHPnuqnTdCCGFzDw9NcQ/lHIPe0yKg9zLUtG+d3N+HRAAL3cM/RwGsg/7/fxz0v+iXApgr9CG0yaL2eTLLoPdWDXDv8yP0D8yzKVryq/y7GHrhV5v9Vf5dAqBUSllYbVsUALiHaZa6h0IKAdwKvceiunjoP/+qxUj1348Gf5/dKodnLhRCdIfeg/N1LW3vBxABYJ0QYoMQorKHpg2AAZU/K/fPa6T7WGfy+uo7D1HAYdFCQUvqtzDPh/4hUKk1gH1Vvm5SrQejNYDcszjd79D/agcACCEMALq4t9cmsdp5i90fxGOgD+VcDH1YIqnysFXaV789e623a3d/H/YBuERKGVPlEebugSqVUk6SUiZBL0ZGAbiulsMthV609If+AV05ZFRZxNQYobZsXvIJgI8BJEopbQD+i79+7yrlAmgq9EnUlar/ftSV/bTn3N/rWdB7WG4G8JGUsrzGnaXcJ6W8DXoxch/0IaDW0Aul76r9rKKklBMa8PpqylTbeYgCDosWCnYfAnhSCBEnhGgGfdLqnCrPm6FPErUIIS6BPqG2+twFAKcms4ZBn/9hEPqE08orSr4HEO6eNGqF/tftCQDL68h2q9DXDYkCkAH9gwjQh5ROQr9aKRL6kFZ99gNoV8fzMwBkCiES3a+lmRDicve/hwghUtyF1jHoQ2bOWo6zFMAwAGXuYZ2l0HsBLKi9QKsvm8e4e5WiAByWUp4UQvSFXoTVZAeADQCeFfraMj0B3AKgoVedHYDeq1K9AJgF4Brow321TnoVQlwrhIh3FzqVc14qoM8/6eF+3uz+3bzA/btS3+s7LVMd5yEKOCxaKNhNArAZ+gfqegArALxQ5fkc6G/g+QDeBTBGSrmrlmPdAX0YYir04qYEwDQAcA8xXAngbugfDNcBGOGeg1Gb2dCLqn0AXAAedG//D4CD7kwbUXfhU+nf0K/GOSqE+KiG51+APl9niftKmJUAerqfawX9cu0i6JOHF0L/a74mG6HPkVgGAFLKQ9B7Bn5yfyieTTaPcWe4G8AU9+t8BPpk29raXgMgBfr3+mMAD0spf2rguQqgf1/Xul9bd/f2ndAn9xZJKX+p4xAXuvc97s54p3vOUQGAy6D3uOVB7xF6FoC5vtdXS6Yaz9OQ10jkb0Tt7zNEwc19xcU0KWWHeht7/tyr3eeeU29j8hj3JOI1UsoIL5/nAwCbpZQN6SUjogZiTwsRhZIuAHZ78wTuS5OHA5jpzfMQhSIWLUQUEtyLsL0CYGJ9bc/hHC9AvzLraSnlvvraE9GZ4fAQERERBQT2tBAREVFAYNFCREREAYFFCxEREQUEFi1EREQUEFi0EBERUUBg0UJEREQBgUULERERBQRT/U2IiIjoXK1du7aZyWR6B/rKzOw00O+5tqmiouL2Xr16HWjIDixaiIiIfMBkMr3TokWL5KZNmxYYDIaQX9nV5XKJgwcPpuTn578D4IqG7MNKj4iIyDe6NG3a9BgLFp3BYJBNmzYthN7z1LB9vJiHiIiI/sfAguWv3N+PBtciLFqIiIhCRERERA8A2Lp1q0UI0etf//pXs8rnRo8e3fq1116Lq/x60qRJzdu2bZvasWPH1M6dO6dMmzYtrqZj+hLntBARESmg2R29PHm8nMz0tWfSPjY2tuKtt95q9uCDDx4MCwv7Sw/QCy+80HTJkiWN1q5duyU2NtZ1+PBh4wcffBDjybxngz0tREREISg2NrbioosuKnrjjTdO60GZOnVqi7feeuvP2NhYFwDExcU5x40bd9j3Kf+KRQsREVGImjRpUt60adOaV1RUnNpWUFBgOHHihDE1NbVUYbQasWghIiIKUUlJSWXdu3c/8dZbb8VWbpNSQgihMlatWLQQERGFsEmTJuW/8sorLV0uFwAgNjbWFR4e7tq8ebNFcbTTsGghIiIKYT169DjZsWPHksWLF9sqt02YMCHv7rvvbnPkyBEDABw5csQwZcqUJupS6nj1EBERUYh74okn8vr165dS+fUjjzxy8Pjx44aePXummM1maTKZ5Lhx4/JVZgQAISXXuSEiIvK2rKysnLS0tEOqc/ibrKysJmlpaVpD2nJ4iIiIiAICixYiIiIKCCxaiIiIKCCwaCEiIqKAwKKFiIiIAgKLFiIiIgoILFqIiIhChNFo7JWUlJTSuXPnlJSUlOTvv/8+EgC2bt1q6dixYyoALFiwIDo6Orp7cnJySrt27VIffPDBlmpT/w8XlyMiIlIhw9bLs8crXFtfE6vV6srOzt4MAPPmzWs0ceLEhEsvvXRr9Xa9e/c+/sMPP+w4duyYoWvXrikjRowo7N+/f7FH854F9rQQERGFoMLCQqPNZquoq02jRo1cXbt2Ld66davVV7nqwp4WIiKiEFFaWmpISkpKKS0tFYcOHTIvXLhwW13t8/PzjevWrYvMyMjI9VXGurBoISIiChFVh4cWLVoUOWbMmLbbtm37vXq7NWvWRCUnJ6cYDAY5fvz4/N69e5/0fdrTsWghIiIKQUOGDDlRUFBgysvLO60WqJzToiJXXTinhYiIKAStW7cuzOVyoXnz5nXOa/En7GkhIiIKEZVzWgBASonp06fnmEwmlJeXC4vF4lKdrz4sWoiIiFRowCXKnuZ0Oms85/r168PbtGlTCgDDhw8vGj58eJFvkzUMixYiIqIQNmHChPivv/465t13392tOkt9hJRSdQYiIqKgl5WVlZOWlnZIdQ5/k5WV1SQtLU1rSFtOxCUiIqKAwKKFiIiIAgKLFiIiIgoILFqIiIgoILBoISIiCiGPPvpoiw4dOqR26tQpJSkpKWXJkiWRffr06axpWpfOnTun9OzZMykrK8svbpBYHS95JiIiUqDre117efJ4G2/ZWO+6L4sWLYr89ttvYzZu3Lg5PDxc5uXlmUpLSwUAzJo1a9eAAQOKp0yZ0uT+++9PXLJkCZfxJyIiIjX27dtnjo2NrQgPD5cA0LJlywpN08qrthk8ePDxP/74wy97Wli0EBERhYgRI0Ycy83NtWia1uWmm25q7XA4oqq3+eyzz2xJSUklKvLVh8NDREREIcJms7k2bdq0+ZtvvolevHhx9C233NJ+0qRJewFg9OjR7cLCwlwJCQmlM2bM+FN11pqwaCEiClBCiONSyighhAZgN4D7pJSvu5+bBmCNlPK/7q8fAnA7gAoATgAvSSlnqchNaplMplP3F+rWrVvJ7Nmz44D/zWlRna8uHB4iIgoOBwCMF0JYqj8hhLgbwKUA+kgpuwAYAED4OB/5gaysLOvGjRtPzVdZt25deEJCQpnKTGeCRQsRUXA4CGAxgFtqeG4igHuklMcAQEpZKKV8z5fhyD8cO3bMOHr06Lbt27dP7dSpU0p2dnb4888/n6s6V0NxeIiIKHhkAvhaCPFu5QYhRDSAaCnlTnWxqCYNuUTZ0/r371+8bt267Orbf/nll62+znI22NNCRBQkpJS7AfwC4IYqmwUAqSYRkWexaCEiCi6TATwK9/u7e0johBCindJURB7AooWIKIhIKbMBbAYwvMrm5wC8IYRoBABCiEZCiDtV5CM6F5zTQkQUfP4FYF2Vr6cDiALwqxCiHEA5gJdUBCM6FyxaiIgClJQyyv3fHABdqmzPQpWedCmlBPCC+0EUsDg8RERERAGBPS1EVC/N7jACiAUQV+URgb8uUFb9CpX6vi4DcATAYQCHABzOyUyv8FRmIjqd0Wjs1bFjxxIpJYxGo3z11Vf/vPTSS084nU784x//SFyxYkUjIYS0WCxy7ty5O5OSkspatWrVNTIy0imEQJMmTco//PDD3a1bt1by/yqLFqIQpNkdzQG0wl+LkKqP6gWKDT5YQVWzO47BXcCgSjFTy7YDOZnped7OROQtW5KSe3nyeMnZW+pd98Vqtbqys7M3A8C8efMaTZw4MeHSSy/d+s4778Tm5+ebs7Ozfzcajdi5c6e5UaNGrsr9li5duq1ly5YVY8eObTVp0qSW//3vf/d4MntDsWghClKa3REFoJP70bnKvztCL0L8USP3o0GX52p2x3EA2wFsrfLYBmBrTmb6cW+FJAoGhYWFRpvNVgEAeXl55ubNm5cbjUYAQPv27ctr2mfQoEFFr7/+ejMfxvwLFi1EAUyzO0zQP+CrFyadAbRUGM1XogD0cD/+QrM7cuEuYFClmAGwOycz3enLkET+orS01JCUlJRSWloqDh06ZF64cOE2ALj55puPDBgwICkpKSm6f//+x2699dbD/fr1K6m+/5dffhmTkpJy2nZfYdFCFCDcPSfnAejjfnQF0Bb8/7g28e7HoGrbyzS7Yxf0tUxWA1gJYE1OZnqpb+MR+V7V4aFFixZFjhkzpu22bdt+b9++ffmOHTs2ffXVV9GLFy9u9Pe//73zrFmzdl555ZVFADBw4MBOBoMBycnJxS+//PI+Vfn5Zkfkh9wTX1MBnF/lkQJe8ecJFgBJ7sfV7m2lmt3xG4AV0IuYFTmZ6QcU5SPyiSFDhpwoKCgw5eXlmVq1alURHh4ur7nmmmPXXHPNsebNm5d/9tlnMZVFS+WcFtWZWbQQ+QHN7rBC7z3pD2AAgL4AopWGCi1WABe6HwAAze7Yif8VMSsB/J6Tme6qeXeiwLNu3bowl8uF5s2bVyxfvjwiISGhXNO0cqfTiY0bN4Z37dpV2TBQbVi0ECmg2R2RAPpBL1D6Qy9YwpSGouraux+j3V8XanbHavyvkFmdk5l+QlU4orNROacFAKSUmD59eo7JZEJ+fr7prrvualNWVmYAgO7du5+w2+1+19so9IUSicjbNLujBYDLAVwJYDBYpAS6UgCLAXwB4MuczPR8xXnIz2VlZeWkpaUdUp3D32RlZTVJS0vTGtKWPS1EXqTZHSnQi5QrofemeH2tE/IZK4C/ux8zNLvjF+gFzBc5memblSYjClIsWog8yD2Bth+AK6AXKh3UJiIfEfjfhOnJmt2xA+4CBvqkXs6FIfIAFi1E58g9P2Uo9CIlHUATtYnID3QA8KD7cUizOxZAL2C+y8lML1aajCiAsWghOgua3WEDMArACHB+CtWtCYBb3Y8Sze5YBL2A+SwnM71AYS6igMOihegMaHZHfwB3APg/AOGK41DgCYc+GftyAG9odsfnAGYC+J5DSET1Y9FCVA/N7mgK4BYAt0NfHp/IE6wArnU/9mp2xywAM3My03eojUXkv7i6JlENNLvDoNkdl2l2x1wA+wC8CBYs5D0JACYC2K7ZHT9pdscYze6IUB2KglNERESPrVu3WsLCwnomJyentGvXLrVr167Jr7/+elzVdrNnz4556KGHWgLAyJEjtZkzZzaufhwAyM3NNfXv37+jL7Kzp4WoCs3uSARwG4AxANoojkOh6SL342XN7pgNYAYvoQ5Ob9y9pJcnj3fvjEvWnkn7xMTE0i1btmwGgM2bN1uuvvrqDi6XC+PHjz8MAC+//HKLhQsX1tvzFx8fX9G8efPy7777LnLo0KFeXXCRPS0U8jS7w6TZHVdpdsdCADkAMsCChdSLATAOwO+a3bFMszuu1+wOi+pQFJxSUlLKXnjhhT0zZsxoDgAbNmywWiwWV0PvNzRixIijs2bNiqu/5blhTwuFLM3uSAAwFvpVHc3VpiGqU3/346Bmd8wE8EZOZvqfijNRkOnbt2/x7t27wwDghx9+iOrWrVuDL8/v16/fiaeffjree+l0LFoo5Gh2R3sAduj3lOFfrhRImgJ4BMD9mt0xB8BzOZnp2xVnoiBR9bY+eXl55qZNm57qZRHi9MW8q26Lj4+vOHDggNffT1m0UMjQ7I5UAI8BuA6AUXEconNhhj7varRmd3wCYHJOZvomxZkowK1atSqiXbt2JQAQHh7uKiwsPFUjxMbGVhw5cuTU++b+/fuNMTExp4qa4uJiYbVavX7ZPue0UNDT7I5emt3xGYCNAG4ECxYKHkYA1wPYoNkd8zW7o7fqQBSYtm7darHb7Ql33XXXAQBITU09uXPnTmvl8xdffHHRvHnzYk+ePCkAYPr06U369u1bVPn8pk2bwjp16lTi7ZzsaaGg5V4I7p8ALlOdhcjLBPTVmUdodse3AJ7NyUxfrjgT+aHy8nJYLBYJAHv27LEmJyenlJaWisjISNddd911oPLKocsuu+y43W5PdLlcMBgMuP766wvXrFkT0a1bt2SDwYA2bdqUzpw584/K437//ffRw4YNK/R2flF1DIsoGGh2x2XQi5X+qrMQKbQMevHyveogpMvKyspJS0s7pDLDqlWrwu+8805t48aNW+prO2bMmMQrr7zy6IgRI4rqa9u7d+/OX3/99Y6mTZs6zzRTVlZWk7S0NK0hbdnTQkFBszsq/9KcCIBd5ETAAADfaXbHLwD+BeCrnMx0/pUawl544YWmb731VrMXX3xxT0PaP/3003nLli2LrK9dbm6uafz48fvPpmA5U+xpoYCn2R3XAngCQKrqLER+bAP04uVTFi9q+ENPiz86k54WTsSlgKXZHb01u2MFgI/AgoWoPt0AfAxglWZ3nK86DNHZ4PAQBRzN7mgB4DnoNzE8ffEAIqrL+dALlzkAHs3JTM9THYiooVi0UMDQ7A4rgPuhz1uJVhyHKJAJADcDuEqzO54D8FJOZnqp4kxE9eLwEAUEze64GsBm6D0sLFiIPCMK+jyXLZrdcZXqMET1YdFCfk2zO7pqdsdiAPMAtFOdhyhItQXwmWZ3LNLsji6qw5B3zZo1K0YI0WvdunVhgL6wXMeOHVPnzZvXKCkpKSUpKSklIiKih6ZpXZKSklKuuuoqDQB++OGHiN69e3fWNK1L27ZtU6+99to2RUVFPq0jODxEfkmzO5oAeAbAHeAKtkS+MhjAes3ueAvAEzmZ6UdUBwpmL107vJcnj/fgxwvWNqTdRx99FNuzZ8/js2fPju3Ro0du5faRI0ceGzly5GYA6NOnT+cpU6bsGTBgQDEA7Nmzx3TjjTe2nzVr1q4hQ4accLlceO+99xofPXrUEB0d7fXl+yuxp4X8imZ3mDW7YwKA7QDuBgsWIl8zArgHwHbN7hin2R384zaIFBYWGtasWRM1c+bMnPnz5zdu6H4vvfRSs2uuuebwkCFDTgCAwWDAmDFjChITEyvq29eTWLSQ39DsjqHQ15KYCiBGcRyiUBcL4DXoPS9DVIchz3j//fdjBg0aVNitW7fSmJgY5/LlyyMast/mzZvDe/fuXeztfPVh0ULKaXZHtGZ3vA3gWwBJqvMQ0V+kAvhesztmanZHI9Vh6Nx88sknsddff30BAIwcOfLI7NmzY1VnOhMsWkgpze64GPrdl29XnYWI6nQrgI2a3XGJ6iB0dvLz842rV69udO+997Zp1apV12nTprX48ssvGzdkZfzk5OSSNWvWNKhXxptYtJASmt0RrtkdrwJYDKCN6jxE1CCtASzS7I7XNbtD+QcYnZnZs2c3vvrqqw/n5uZu3Ldv38b8/PwNCQkJZTk5OZb69n3ooYcOfPLJJ3FLliw5dS+iN998M/bPP//06ZwnFi3kc5rdcQGA9QDuA1e0JQo0AsBY6HNdLlAdhhru008/jbv66qsLqm678sorCyZPntyyvn0TExMrZs2atevhhx9O0DStS7t27VKXL18e3bhxY59dOQTwhonkQ5rdYQHwFICHwauCiIKBE8CLAJ7MyUwvUx3G3/GGiTXjDRPJ72h2R3cAawDYwYKFKFgYof8//atmd6SpDkPBj9ffk1e513h4DMATAMyK4xCRd3QD8ItmdzwF4PmczHSn6kAUnNjTQl6j2R3JAFYCeBosWIiCnQX6fYxWaHZHJ9VhKDixp4U8TrM7BPS7Mf8LQJjiOETkW+dDn6RrB/B6TmY6J06Sx7CnhTzKvfjUFwBeAgsWolAVDuBVAAs0u4OrW5PHsGghj3EPB/0K4HLVWYjIL/wd+iTdVNVBKDiwaCGP0OyOqwH8DIBj2URUVQcAqzW7Y6TqIAQYjcZeSUlJKR06dEjt3LlzSkZGRnOnU583vWDBgmghRK+pU6c2qWy/YsWKcCFEr0mTJjUHgJEjR2ozZ85sDAD79+83Jicnp7z66qtxvsrPOS10TjS7wwDgWeiXPXKhOCKqSRSAuZrd8RyAx3My0326IJm/2mv/qZcnj5eQ2X9tfW2sVqsrOzt7MwDs27fPNGrUqHaFhYXGqVOn5gJAx44dS+bOndv4/vvvPwQAc+bMie3cuXNJ9eMcPnzYOHjw4E633nrrwfHjxx/25OuoC3ta6KxpdkdjAA7olzSzYCGi+jwGwOF+7yDFWrVqVfHOO+/kzJw5s5nL5arcVlZaWmrYs2ePyeVyYcmSJbbBgwcXVt2vqKjIMGTIkI6jRo06/Oijjx70ZWYWLXRWNLujG/TF4oapzkJEAWUY9HkuXVQHISAlJaXM5XJh3759p0ZeRowYUTB79uzGixYtiuzatWux1Wr9yxVgTzzxROL5559//Mknnzzg67wsWuiMaXbHdQBWAWinOgsRBaT20Oe5jFIdhIDqt/MZPXr0kfnz58fOmTMn7oYbbjhSvf2FF1547JtvvompWuj4CosWajDN7jBqdscUAB8C4B1eiehcRAL4RLM7nnfPjSMFNm/ebDEajWjVqlVF5bbWrVtXmM1muWzZskZXXHHFser7XHvttQW33Xbbgcsuu6xjQUGBT392/EWhBtHsjiYAvgXwoOosRBRUHgHwtWZ3xKoOEmpyc3NNd9xxR5sxY8YcMBj+Wg489dRT+5555pm9JlPNnSmTJk060L9//2Pp6ekdTp486bM5jSxaqF6a3dED+vyVwaqzEFFQGgp9nks31UGCXWlpqaHykueLL7640+DBg49NmTIlt3q7Sy+99MTNN998tK5jTZ8+fV98fHzZyJEj21ZeNu1tovpYFlFVmt0xFMBn0LtyiYi8qRjA/+Vkpn+tOog3ZGVl5aSlpR1SncPfZGVlNUlLS9Ma0pY9LVQrze64BsBXYMFCRL4RAeAL92R/otOwaKEaaXbHXdAn3FpUZyGikGIG8L5md9ytOgj5HxYtdBrN7pgIYAb4+0FEahgATNfsjn+qDkL+hcv40yma3SEATAHwgOosREQAnnVfVfRQTmY6J2AS/5ImnWZ3GAG8CxYsRORfHgDwH/d7FIU4Fi0Eze6wApgL4FbFUYiIajIGwKfu9yoKYSxaQpxmd0QD+BrACNVZiIjqcMvrt0MAACAASURBVBX0my1GqQ4S6P7880/T8OHD2yUmJnZp37596sCBAzts2LDBGhYW1jMpKSmlffv2qTfccENrp9OJrVu3WoQQvcaPHx9fuX9eXp7JZDL1HD16dGtfZ+eclhDmXuX2awC9VWchImqAwQAWa3bH33My0w+rDnOuMjIyenn4eGvra+NyuXDFFVd0uOGGGw4vWLBgFwCsXLkyPDc315yYmFianZ29uby8HBdeeGHnOXPmxFxwwQXFCQkJpd99910MgFwAmDVrVuMOHTqc9GT2hmJPS4jS7I5EAMvBgoWIAksfAMs0u6OV6iCBaMGCBdEmk0k+8sgjByu39e3bt6Rt27ZllV+bzWb06dPn+Pbt260AEBYWJjt06FCybNmyCACYN29e7IgRI067kaIvsGgJQZrd0RnACgCdVWchIjoLKQCWa3ZHB9VBAs2GDRvC09LSiutqU1RUZFi2bFmjbt26lVRuu+66647MmTMndufOnWaj0Sjj4+PLvZ/2dCxaQoxmd3QCsBRAouosRETnQAPwEwsXz9mzZ481KSkppU+fPklDhw4tvOaaa07d4XnkyJHHli5d2ui///1v7MiRI5X0sgAsWkKKZndoABYDaK44ChGRJ7QA8D2Hihqua9euJVlZWRE1PVc5p2XLli2bX3755b/cRDEsLEx269atePr06S1uuummAt+kPR2LlhCh2R3xABYBSFCdhYjIgzQA32l2R5zqIIHg8ssvLyorKxMvvfRSk8ptS5cujdixY0e9t2x59NFH85988sm9LVq08M0tnWvAoiUEuK8SWgSgveosRERekAJgIS+Hrp/BYMCXX365c/HixY0SExO7dOjQIfXJJ5+Mb926db1zVHr37n1y3LhxSq/aElJyZeRgptkdMQCWAOihOgsRkZctBpCek5leqjpITbKysnLS0tIOqc7hb7KyspqkpaVpDWnLnpYg1tv+fgSAhWDBQkShYTCAD7nkf/Bi0RKsMmyWX633zH/I9HGF6ihERD50FYC33TeApSDDoiUYZdgMAOYIgaFjTV/0f8H01lLVkYiIfGgM9DvWU5Bh0RKc3gAwqvKLa0xLB840P8/ChYhCyQOa3fFP1SHIs1i0BJsM2yQAd1fffLExa+AXlsd/EnC5FKQiIlLhWc3uOO39kAIXi5ZgkmG7AcBTtT2dZtjVf7HloZ/NqCirrQ0RUZB5Q7M7rlMdgjyDRUuwyLBdBODd+pq1M+RfuMJ634ZwlNZ57wkioiBhADBLszv+pjqIPzAajb2SkpJSOnfunJKSkpL8/fffRwLA1q1bLR07dkwF9JsqRkdHd09OTk7RNK1L7969O3/44Yc2tcl1JtUByAMybO0BzAdgbUjzZuJo79XWezf2L32l9TFE+cUvIhGRF5kBzNXsjr45melZqsNUWrykfS9PHm/wJTvX1tfGarW6srOzNwPAvHnzGk2cODHh0ksv3Vq9Xe/evY//8MMPOwBg5cqV4aNGjeoQERGRc+WVVxZ5MvOZYk9LoMuwNQbgANCkvqZV2URx15+tY/c3x5ED3glGRORXIgDM1+yOWNVB/EVhYaHRZrPVuyxG3759Sx5++OHcadOmNfNFrrqwaAlkGTYzgHkAOp/N7uGirNNy6/gSTeTt8WwwIiK/1BbAR6G8+FxpaakhKSkppW3btqnjx49v8+STT+Y1ZL8+ffoU79y5M8zb+erDoiWwvQXg4nM5gFk42yy2PGROFbt3eCgTEZE/uxTAZNUhVKkcHtq9e/fv8+fP3z5mzJi2rgZcVOovt/xh0RKoMmwPQ19A6ZwZhWzxleWfcRcYfv/dE8cjIvJzj2h2x6j6mwW3IUOGnCgoKDDl5eXVO7/1119/jejQocNJX+SqC4uWQJRhuwTAc548pEGg8Yfmf7UZZvj5N08el4jIT83U7I4uqkOotG7dujCXy4XmzZvXOa/l559/Dn/xxRfj7733XuVzIHn1UKDJsCUC+AiAx8dkhUDUdPOrqf+suG31B84hF3j6+EREfiQSwOea3XFeTmZ6geowvlI5pwXQh3ymT5+eYzKZUF5eLiwWy6lxojVr1kQlJyenlJSUGOLi4spffPHFP1VfOQSwaAksGTYLgLkAmnrrFELA+i/Tu+c1ReHyV50jL/LWeYiI/EB7AB9odkd6Tma6z1cLb8glyp7mdDprPOf69evD27RpUwoAw4cPLyoqKlrv22QNw6IlsLwGoI+3TyIEjBNM8/o1EYVLn6i4baC3z0dEpNAwAM8CmKg6iCoTJkyI//rrr2Pefffd3aqz1Ef4y4xgqkeGbQwasOKtp33r7LX0rvIHWbgQUbD7v5zM9HnePEFWVlZOWlraIW+eIxBlZWU1SUtL0xrSlhNxA0GGrSeAN1Wc+jLj2oFzLRnLAFa3RBTU/qvZHSmqQ1DdWLT4uwxbDPQF5JQt6tPbsG3A95aHVxrhrHflRCKiABUFfWIub23ix1i0+L9/A9BUh+hoyO233Dp+XRhKS1RnISLyko4A3tfsDqE6CNWMRYs/y7DdBsBvFkBqKY6ct9o6dls0ThSqzkJE5CXpAO5XHYJqxqLFX2XYOkG/WsivxIgTaT9bx+Y1wdGDqrMQEXnJZM3u6Ko6hLfs3LnTPHjw4PZt2rTpkpiY2GXMmDGJJ0+eFCtXrgz/+OOPTw2PPfDAA/GTJk1qrjJrdbzk2R/pN0J8H/riR34nQpQmrbTet3tw2ZSyPbJZK9V5iIg8zApgjmZ39MnJTC/11kla/LC+lyePl39x93rXfXG5XBgxYkSH22+//cD48eN3VlRU4IYbbmgzfvz4VqmpqSVr1qyJvPbaaz3Sm15RUQGTybNlBosW//QsgN6qQ9TFIira/mB5IHd42eRd2bJ1O9V5iBpKVpQh/4NHISvKAZcLEZ37Iab/jSjJWY+jP86ElC4YzOGIS58Ac+N4HFv7FY6v/xrGRk3R7OrHIYxmnNz7O4q3rkTs4DtUvxzynm7Q34sfVh3Ek7766qtoq9XqGj9+/GEAMJlMmDFjxh5N07qZTCYppURSUlLUgw8+mAcAW7ZsCe/Tp0/n3Nxcy913373/8ccfPwAAb775Zuz06dObl5eXi549e56YNWvWHyaTCRERET3uvPPO/UuWLGn04osv7v3iiy9s3377bYzRaJSDBg069u9//3vvueTn8JC/0e8rFBD/k5iEK36h5THbeSJ7i+osRA1mNKP5dZMRf9s0tBzzGkp2r0Xpvmwc+e5NNBn+EOLHvI7IlIEoXPkxAOB41rdoeds0WJq3R8nu3yClROGKj2Drd73iF0I+8IBmd1ysOoQnbdy4MTwtLa246rbY2FhXq1atyh544IG8yy+/vCA7O3vzHXfcUQAAO3bsCFu6dOm2X3/9dcuUKVPiS0tLxW+//RY2d+7c2DVr1mRnZ2dvNhgMcsaMGXEAUFJSYujSpUvJhg0bsrt3716ycOHCxtu3b/9927ZtmydPnpx3rvlZtPiTDFscgFkAAmbmukHIuE8sTycMNqz1yyWfiaoTQsBgCQcASFcF4HICQgBCwFWmv5e7Sk/AGBX7v52cTsjyUgiDCSd+X4Lw9r1hDItSEZ98ywDgPc3uiFEdxFOklBBCnLbulnv7ae2HDh16NDw8XLZs2bIiNja2fO/evaZvvvkmetOmTRFpaWnJSUlJKcuXL2+0a9cuKwAYjUbceuutBQAQGxvrtFqtruuuu67Ne++9FxMVFXXOt0pg0eJfpgEIuDkiQiD6HfNLSaOMP/6iOgtRQ0iXE7kzx2Hv6zchTOsOa3xnxA0bhwOfZmDvG7fgxO8/wHaBfuFeoz5XI2/2g3AVF8LaKhknNi1GdI90xa+AfCgR+ntzUOjatWvJ+vXr/zJf8siRI4b8/HyL0Wg8rZixWq2nthmNRlRUVAgppRg1atTh7OzszdnZ2ZtzcnI2vfzyy7kAYLFYXJXzWMxmM9avX79l5MiRRz///POYQYMGdTzX/Cxa/EWGbQSA61THOFtCIOwF07973m38coXqLET1EQYj4se8joR7/ovSvG0oO5iDY2u+QLNRGUi49z1Edh2CgiXvAACiulyC+DGvocnlD+HYr58jutcVKNm1FgfnT8aRxW9DSp/fZ49870bN7rhSdQhPuOKKK4pOnjxpmDZtWhygT5a95557EkeNGnWoRYsW5cePH6+3Lhg2bNixBQsWNN63b58JAPbv32/ctm2bpXq7wsJCw5EjR4zXXntt4YwZM/Zs2bIl4lzzs2jxB/qqt0qW6fckIWB61PRR30mmWUtVZyFqCENYFMISu6Jk11qUH9gNa3xnAEBkcn+U7vvrVK2KosMoy9+OiI4XoHDVR2hy5aP6pNycLBXRyfdmaHZHY9UhzpXBYMDnn3++47PPPmvcpk2bLm3btu1itVpdr7322r6//e1vRdu2bQtPSkpKefvtt2t9rb169Tr5+OOP7xs8eHCnTp06pVxyySWd9uzZY67e7ujRo8Zhw4Z17NSpU0r//v07P/vss3vONT+vHvIPUwG0VB3CE4SAuM30zcBm4ujSseX38UaL5HecxYUQBiMMYVFwlZfi5B/r0ej8/4OrtBjlR/bBHNsKJbvXwxyX+Jf9jv40BzH9bwIAyPKyU/NgZIXXrogl/9IC+tpZN3vqgA25RNkbOnToUL5kyZId1beHh4c7N23aVLVaL6j6/Pbt23+v/Pcdd9xRUDlZt6ri4uJ1lf9u06ZN+caNGz16oQaLFtUybJcBuFV1DE8bblw9MA6FS68vf3yA/u5O5B+cx4/gkGMqIF2AdCEiqT8iOvQBho3FwfmTASFgCItC3N8nnNqnbP9OAICleXsAQFS3ocj7z1gYGzVBTL8blLwOUuImze74JCcz/SvVQUKVkLx5rzoZtmgAmwC0Vh3FW7JdicvTyyZf4ISRBTIRBYNcAKk5melHz3THrKysnLS0tENeyBTQsrKymqSlpWkNacs5LWo9jyAuWAAgybDnoqXW+9daUM4+dCIKBvEAXlEdIlSxaFElwzYAwN2qY/hCgjh0/irr2M2RKClSnYWIyANu0eyOoWexn8vlcnG4vAr396PBl+CxaFEhw2aCfrVQyPzyxomiHj9b790Ti8LDqrMQEXnAK5rdcabD3psOHjxoY+Gic7lc4uDBgzbo0yQahPMM1LgPQKrqEL4WJU6mrLLet/OS0ill+9A0KK6WIqKQlQzgHuhXFDVIRUXF7fn5+e/k5+d3ATsNAL2HZVNFRcXtDd2BE3F9LcPWAsBWAI1UR1GlQhr2/q0ss2K7TNBUZyEiOgcFADrmZKazB9lHWOn53osI4YIFAEzClfCt5dHIHmL7VtVZiIjOQWMAT6sOEUrY0+JLGbb+AJapjuEvpEThmPJH/vjR1b2b6ixERGfJCaB7TmZ6g+dl0NljT4uvZNiMCKKbbnmCELDNNL/QcYRh+RrVWYiIzpIRwKuqQ4QKFi2+cw8A9ihUIwTCp5rfTPuHceFK1VmIiM7SJZrdMUJ1iFDA4SFfyLA1AbAdQIzqKP5KSrjecg5fnllxwwDVWYiIzsJO6CvlciFNL2JPi288DhYsdRIChrtNCwa8bH7zR9VZiIjOQnsAE+ptReeEPS3elmFrCyAbgEV1lECxzNl16ejyx3iHaCIKNEUAOuVkpuerDhKs2NPiff8CC5YzMsC4ceBXlok/GeByqs5CRHQGogE8pzpEMGNPizdl2HoCWIMQWq7fk/5wNVs9pGxKz3KYWPQRUaCQAPrkZKbzqkgvYE+Ldz0PFixnrY3hwAWrrGM3ReDkCdVZiIgaSICXQHsNixZvybANBTBEdYxA10Qc67naeu9uG44fVZ2FiKiB+mp2xzWqQwQjDg95Q4ZNAFgLoIfqKMHipDRvH1Q6tVE+YpurzkJE1ACbAHTLyUznh6wHsafFO64DCxaPChPlHX+yji9rK3L/VJ2FiKgBugC4QnWIYMOixdMybAYAT6iOEYzMwpm4yPKwtZvYuV11FiKiBvin6gDBhkWL540EkKw6RLAyCtn8c8sTTS8ybNyoOgsRUT3O0+yOoapDBBMWLZ6kz2V5XHWMYGcQiJltfq7dcMOqtaqzEBHVg70tHsSixbOuAG+K6BNCIPJ18+tdRxu/XaU6CxFRHQZodsdFqkMECxYtnsW5LD4kBCxPmd7rc7/p059UZyEiqgN7WzyElzx7SobtbwAWqo4Rqj6qGPSjveLOQapzEBHVondOZjqHtM8Re1o8h70sCl1n+nHQf8wv/qg6BxFRLSaqDhAM2NPiCRm2SwAsVh2DgHWu9suuLnvqIgkDC3Ii8icSQJeczPTNqoMEMr6xe8ZDqgOQrodh54DFlodXm1BRrjoLEVEVAsBjqkMEOva0nKsMW2cAW8AbI/qV/TJmzaDSqSklsEaozkJE5OYE0CknM32X6iCBij0t5248WLD4nebiaO/V1nt3NMLxQtVZiIjcjADsqkMEMva0nIsMW2MAewBEqo5CNSuWlq2DSqfGHkDjpqqzEBEBKAPQPiczfa/qIIGIPS3n5nawYPFrEaKs83LrfcVtRD7fIIjIH1gA3Ks6RKBiT8vZyrAZAewC0Fp1FKqfUxryLi97tniz1NqrzkJEIS8fQGJOZnqF6iCBhj0tZ+9qsGAJGEbharnA8s/G54vNvNyQiFRrAWC46hCBiEXL2ZugOgCdGYOQsR9Znm19meGXdaqzEFHIu0N1gEDE4aGzkWFLA7BedQw6O1KidGLFP9Z/6Bx8vuosRBSynAA0Tsg9M+xpOTu3qw5AZ08IWCeb/tN7rHH+ctVZiChkGQGMUR0i0LCn5Uxl2MIA5AJorDoKnRspIWc5hy57suLWgaqzEFFI+gNAu5zMdJfqIIGCPS1n7mqwYAkKQkDcYvpu4Azz1B9VZyGikNQGwBDVIQIJi5Yz9w/VAcizhhl/HfSJ5amlALsdicjnON3gDHB46Exk2NoB2AEu2x+UtrlarfhbWeb5ThhNqrMQUcgoA5CQk5l+UHWQQMCeljNzG1iwBK1Ohn39frKO/82KspOqsxBRyLAAGK06RKBg0dJQ+gq4t6qOQd4VL470WW0duzUKxcdUZyGikMEhogZi0dJwQwG0Uh2CvK+xOJ72s/XefXEoPKQ6CxGFhCTN7rhIdYhAwKKl4W5QHYB8J1KUJq+0jitKEAdzVWchopDA3pYG4ETchtDXZjkAIFp1FPKtCmnITS+bXLpVtm6rOgsRBbViAPE5memFqoP4M/a0NMzfwIIlJJmEK/5ry2PRvcXWLaqzEFFQiwAwUnUIf8eipWGuVR2A1DEI2eRTy1OtLjH8lqU6CxEFtRGqA/g7Dg/VJ8MWAX1oKFJ1FFJLSpQ8XHHXxrnOgX1UZyGioFQCoElOZnqx6iD+ij0t9bscLFgIgBAIf9H0Vs+7jF+tUJ2FiIJSOIBLVYfwZyxa6sehITpFCJjspg/7Pm6as0x1FiIKSleqDuDPODxUlwxbNPShoTDVUcj/fOm88Mf7yscNUp2DiILKQQAteOfnmrGnpW7pYMFCtbjCuGrQB+ZneaNFIvKkpgD6qg7hr1i01G246gDk3/oaNw9caHlshQEup+osRBQ0OERUCxYttdHvNTRMdQzyfymGPy9aapmwxoLyUtVZiCgosGipBYuW2l0AIE51CAoMiYZD56+yjt0ciZLjqrMQUcDrqNkdyapD+CMWLbXj0BCdkThR1ONn671/NsaxI6qzEFHAY29LDVi01C5ddQAKPFHiZMoq67iCeBzKU52FiAIai5Ya8JLnmmTYWgP4Q3UMClzl0rh3WFmmc6ds1UZ1FiIKSBL6DRTzVQfxJ+xpqRl7WeicmIUz4TvLIxHdxY6tqrMQUUAS0FdkpypYtNSMRQudM6OQTedbJrUYYMjaoDoLEQUkDhFVw+Gh6jJsZgAF4P2GyEOkRPGE8ns3f+Hq11t1FiIKKCcBxPEGiv/DnpbTnQcWLORBQiDiFfMbabcZv16pOgsRBZQw6MtvkBuLltMNUh2Ago8QMD9hmn3Bo6YPeaNFIjoTXNK/ChYtpxukOgAFJyFg+H+mrwZMMc/4UXUWIgoY/VQH8Cec01IV57OQjyx1dvvxlnL7INU5iMjvHQUQm5OZzg9rsKelut5gwUI+MNC4YdCXln/+JODi7eeJqC4xAFJVh/AXLFr+apDqABQ6uhl29//B8uDPZlSUqc5CRH6N81rcWLT81SDVASi0aIb9F66wjtsYgZMnVGchIr/FeS1uLFoqZdhM4C8GKdBMFPZabR27y4bjR1VnISK/xJ4WNxYt/9MVnM9CijQSxV1XW8cebI4jB1RnISK/00GzO5qpDuEPWLT8z3mqA1BoCxdlHZdbx5doIm+P6ixE5HfY2wIWLVVxiXVSziycbRZbHjJ3Fbu2q85CRH6FRQtYtFTFnhbyC0YhW3xhebxJX8Om31VnISK/wTmX4OJyugxbGIAiACbVUYgqSYkT95aP37rQdX5P1VmISLlSALaczPRS1UFUYk+LrjtYsJCfEQKRb5hf7XKT8fvVqrMQkXJWAL1Uh1CNRYuO81nILwkByzOmmedNMM39SXUWIlIu5Oe1sGjRcT4L+S0hYJxg+qz/ZNM7S1VnISKlLlQdQDUWLbqQ73Ij/3eDacnAt81TflSdg4iUSVYdQDVOxM2wWQCcAOe0UID4zdVh2ciyjIskDPyjgyi0lAGIyMlMd6oOogrf9ICOYMFCAaSnYceARZaHV5tQUa46CxH5lAWApjqESixa2N1GAai9Ia/vcuv49WEoLVGdhYh8qrPqACqxaAFSVAcgOhstRMF5q61jt0fjRKHqLETkM51UB1CJRQt7WiiAxYgT3X62js1vgqMHVWchIp9gT0uIY9FCAS1ClHZeab3vRGuxf6/qLETkdSxaQlaGzYAQ/wWg4GARFdoSy4PGZPHHTtVZiMirODwUwtoCCFMdgsgTTMLV0mGZGNNHbNmsOgsReU0rze6IUh1ClVAvWjg0REHFIGTcx5ZnEi81rFmvOgsReU3I9raEetHSXnUAIk8TAtH/Nr+cdI3xh19UZyEir2DREqJaqw5A5A1CIOx509s97zF+sUJ1FiLyuJCdixnqRUsb1QGIvEUImB42fdz3SdN7vNEiUXBh0RKi2NNCQU0IiDGmbwe+aX6FhQtR8ODwUIhi0UIh4e/GXwZ+ZHlmKRDqd0glCgohW7SE7l2eM2xhAIoBCNVRiHxlqythxd/LnjvfCSNvEkoU2FrmZKbnqw7ha6Hc05IIFiwUYjob9vZbZp3wmxVlJ1VnIaJz0kx1ABV8VrQIIZxCiPVCiCwhxG9CiL7u7ZoQYpP734OEEIVCiHVCiC1CiCe9GImTcCkktRKH+6yyjsuOREmR6ixEdNbiVAdQwZc9LSVSyu5SyjQAjwF4rpZ2P0kpewDoDeAmIUQvL+XhfBYKWbGiqPsv1nv3xqLwsOosRHRWmqgOoIKq4aFGAArqaiClPAFgLby3AFy8l45LFBAixcnkVdZxha1wME91FiI6Y+xp8bJw9/BQNoB3ADxTV2MhRByACwD87qU8Tb10XKKAYRUV7ZZa73d1Ent2q85CRGeERYuXVQ4PJQEYBmCWEKKmibD9hRDrAHwHIFNKyaKFyItMwtXqG4s9qqfYlq06CxE1GIsWX5FSroI+HldT4fCTlLKHlLKXlHKGF2OwaCFyMwjZdJ4lo+XFhnVZqrMQUYNwTouvCCGSABgBqJwEGJI/cKLaCAHbu+YXO11l+OlX1VmIqF4h2dPiywWmwoUQ693/FgBukVI6hRAmAKU+zFEpJH/gRHURAuEvm6d3b1JxbOXbzvS+qvMQUa1C8jPMZz0tUkqje05LdyllmpTS4X4qFcBOd5sfpZTDfRQp1kfnCRp7Cl24+L0TSH7jOFLfPI5XV+u15qe/lyP1zeMwPHUMa3Kdp9qv+LMC3aYfx3lvH8eOIy4AwNGTEpfNOYGQXYk5AAgB80TT+xc8Znp/meosRFSrkCxalC7lLYR4GsCVAG716YkzbGYAkT49ZxAwGYCXhoahZ0sjikolev37BC5tb0KXZgZ8dk047lrw10VWX1pVhnnXhCPnqMT0X8vw0mVheGZpKSZeZEXNc7DJXwgBw10mx4DmomDphPKxA1XnIaLThOQUB6XL+EspJ7l7Xdb5+NTsZTkLLaMN6NnSCACItgokNzVg3zGJ5KZGdG5iPK292QiUVADF5RJmI7DziAv7ilwYqPG2N4FihHHlwDnmybzRIpH/sWl2x+lvvEEuVO891Eh1gECXc9SFdXlOnJ9Q+/8zj11kxZ1fncQrP5dhbB8L/rnkJJ652OrDlOQJFxk3DXRYJq4wwOWsvzUR+YhACP4BHqpFS7jqAIHseJnEyE+K8cqwMDSy1j7M072FEatvj8QPt0RiV4EL8dEGSADXzi3GTZ+VYP9xl+9C0zlJNfxx0Y+W+3+1oFzFpHkiqlnIzWth0UJnpNypFyw3djXj6mRzg/aRUuLZZaV4YoAVTy0txVODrLipmxmv/Vzm5bTkSa0NBy9YaR33eyRKjqvOQkQAQnBeS6gWLWGqAwQiKSX+8eVJJDcx4oELGz7M815WOdI7mtA4XKC4HDAI/VFc7sWw5BVNxLGeq61j/4hBUZ33DiMinwi5npZQnRHJnpazsGKPE7M3lKNrMwO6z9D/2J482IrSCmDc1ydxsFgi/YNidG9hwLc36RdnFZdLvJdVju9uigAAPHCBBSM/KYHFCHw4kj+GQBQtSlJXW8fuuLj05dI8xLVQnYcohIXcH+AiJNfLyLCNADBfdQyiQFYujXsuK3vetUvGt1GdhShE3ZSTmf6+6hC+FKrDQ/wTn+gcmYUz8XvLw+FpYsc21VmIQhQveQ4RIdelRuQNRiGbzbdManaRYeNG1VmIQhCLlhDBnhYiDzEIxMw2P9f+csPKNaqzEIWYkJuXGqpFi0V1AKJgIgQiDJD3bQAAIABJREFUXjNP6zbG+PUq1VmIQgh7WkJECM4+JvIuIWCZZJp9/kOmj39SnYUoRLBoCRFcjpzIC4SAYazpi/4vmN5aqjoLUQgIueGhkHvBblw/nshLCgyGI6WHjx+yXtX450JzxPmq8xBR8AjVnhYWLURe8HlU5C/PHWm5oeRId+s9+xbmQkreq4jIe0Ju1CBUi5aQ+0ETeVOREMdGxbdY/l55rO1va7uGF5tKIpvsaRzdQ65doTobURALuc+yUC1a2NNC5CGLIsLX9W+TUHSg1NzxmVnO6O0db7DAeahZk9KWkbccf89gkM69qjMSBSkWLSEi5H7QRJ5WIkTxLS2bLbu/WZPuxgo0nvq283BB49SDZSZzO6C8TbwrNn5/TlLsLfgPixYi7wi5z7JQLVrY00J0DlaGhW3s1ybhwG9hYQMA4KW3nRvCy5CyJelml6t893YBA2JkROuCI620S1zfN7XJgrWqMxMFIRYtIaJCdQCiQFQGlN3bvOmPd7VomlIuhAYAj33iWta8EBccjk3ZUG6J7uEs33ZCCOESEMIK0+79+e33PoanYyFlmdr0REEn5P4AD9Wi5YTqAESBJstq2dq3TcIfyyLCB0EIIwBcu9T5U49dciAAbE66uQIAXBX7YgSEBIDGMuroH39075Ig/2yVhnVcLZfIs0pUB/C1UC1ailQHIAoUTsD5aNO4H29q2bxdqcHQsXL7+dmu365eKS8AgMONkzeUWxr1lNJZDnmyEyCcABDvjDWWl4fFlZRErx2Hl3oZpDNP1esgCkIFqgP4GosWIqrVVrN5V782CdkLoyIHQQhz5fbWB+SuB+a72gvADABbkkeXA4CrYu82AOEGYXABQCtXbDMA+COnhzUcJ6Oux6wc378KoqB1VHUAXwvVouWY6gBE/kwC8tm4xkv/r1WL+BMGQ2rV5xqdkIczZzpNArABwOHGyRvLLI16AYCrbNshvZU+PNRURreFRNmhQ617uFxiz9+x4MIoeWy9b18NUdBiT0uIYE8LUS3+NJn29m/dKuvjRtEDIURY1edMFbL0lbec+0wutK7ctiXp5lMTbJ0Vf4QDgIBwAYABBrMJhl2AEIcOajsBwI6noyAlJ8MTnTv2tIQIFi1ENXi1se2n9ISWMYVGY/eann/hXeeaqFJ0q/z6SEznTWVWW69TDVzH2gNA5URcALDJyIMAsHt3jyQp4WyL3R1SsZEr5RKdGwkWLSHjOPQfOBEByDca8y9JjF/zToytP4SIqqnN/fOdSxMOo1/VbZuTbzl1byGX8+BuAHEAIIQ4tX5ES1eMCwDKyiJblJZGrAWACXixh5CuA154KUShoij/4u5cpyUkZBRK8LJnIgDATFv0iqGJ8WEHTabetbW5YrVrxYXZ+qXNlY7EdPq9ai+Ls2z7vsp/CxhO/VHQyhXXuPLff/6RBgCIQHGjUfhwu4deAlEoCrleFiBUixZdoeoA/5+9Ow+Mq6r3AP49d2ay7/sySdp0TegybVlKU9pGVlkEQRABCUUQEPWBoNKnD8KmiIBVH0LLUkBBQQRF0QeIdKGlpQul0yVNmr1Jp1mafZnlnt/7Y5qSppPMdmfOTOZ8/mpm7j33F2gnv/zOOb8jSSIdU5TOi425nzyVllpGjKWMd52plu+5/iN+SkKzv6TipB4R3F574s+MsRNNr3J4cjHI2QTr6NHiRUTsCAB8BW8tiaP+PRp8K5IUiSJuES4Q2UlLh+gAJEmUNxPit60ozOfNBsPZE12X10mN973B8xkQPfr1rpQZ+2zRKSclMqR2Foz8efSaFgP0CQpYo/MrRdfZaax2XgP2YzwSDaKIK3FLkgZkpSXCWEQHIEnB1quwnivzczY/mJl+FjGWOdG18UPU88sXVK4cX6cy2v6SmwZHf018oB3gRSNfj+weGpFIsSeaytXXLywmcq4pm46aWTNRJRflSpL3ZKUlwsikRYoo78fF7lpWaByoiYoqc3etTiX76rVqnUHF1LHvdSXP2G+NTjlj9Guq/VDd6K/Z8eZyI7J48olt0cNDSQV2e8yuka/vwWPzGHFZ+ZQk78ikJcLIpEWKCIOMDXwzN3vjPVkZC1TG8jy559FX1K3Jg1jg6r39JRWnLGLntprh0V+Pnh4CACNPSxz9dXPTnBNJTAL6U67Amwc8iUuSpBPk9FCEOSo6AEkKtI9jY/aUFRk7dsdELwNjzJN77nhX3VBswTmu3utKnr7fGpN6xtjXucNy0lTT6N1DAJDLUwtHf33kyMxFROgc+foqvL40lgb3eRKfJEkAZKUl4shKizRp2QDr7dmZG+7IzpzjYKzI/R1OF+zkW1fsoWXjve+qykJkHwRsM0e/Nnr3EADEITqT0Rf/5oh0Ud3dOXtPXA+we/BzBqKT7pMkaVwR+Yu3TFokaZLZFR11YEmRsXlzXOxyMObxv/HSRtr/rff5fAa4rMh0J087YI1JO7XKYm88CEA/+rWxlRYAiEN08+iv6+tOLxj9dQn2lxbjkFyUK0meqRcdgAgyaZGkScIBOO7NTN9QkZs93aoo0725N7ObWu//o5rBgNjxrtlXcpPL4y9U28FTDiAdW2kBgAyedFKVZmAgtdhujzqpT8sP8ehpIIrIsrckeanO/SWTTyQnLUfcXyJJ4aEqylBbVmSsfi8hfjkYM3hzb6yV+p56Tu1XCFnjXdOdNLXKGpN2pqv3uKP5lLb/YxfiAkA+T4sZ+1pLS8lJiVAS+tIuwd/2jr1OkqSTcABNooMQIXKTlsqebgCn/IYoSeGEA/zB9NQNV+fl5A8qSqm39yuc1KfWqlXRDsyc6Lr9pStddpAm4ipo8JR7XU0P5fO0/LGvtRwuXUB0cnfqa/GHsmgakruJJGl8hy3lJrvoIESI3KTFqdb9JZIUmhr0+qZzCvPNbyYlLgdjp1QxPPHAa+rm9H6csk5ltJ6kqQeHY9LPcvUeqZZDABLHvu5qeiiZ4gowJkHhXB/X15v5+ejXFJByNx53gEgeaipJrkXk1BAgkxaZtEhh6anUlI2XGXPTe3W6+b6OUfGBurGkGePuFBqxr+SmcftBqLZqlzsY2DgfLdEwnLJ4sK5uUfbY1+Ziz9xCNMhFuZLkWkQuwgVk0iKTFimsHNHpjpQX5O9Yl5K0DIzF+zrOMjPffvEOctsZtydxysHhmHSXa1kAgNvrXa6fYezUNS0AkMYTTplm6uvLnKWq+lOmg36MR2aBSB5sKkmnkklLhJJJixQ2nk9O2nxBQV5ch153yonL3pjWStV3/oPPZoDO3bX7Sld2TdSUjnj3FFevu1qICwD5PNXlM1tbZrWPfS0F3ZkX4F+fu7pekiKcnB6KUDJpkUJeh07puMiYt/XXaSllYCzZn7HSeunoI6+oCczFOpSxehOLqsdbywIAXO1uASjX1XsMissmcfk8/ZSpIABobp67gAinNK77JtaVRZG1xl2skhRhZKUlQkVstiqFhzcSE7Z+qSAfLQb9Yn/HirLT4K+eUzt1BI/OH9pXuvLYRFUWbq9pGO89hZ26ewgA0ilhKgjDY19XVUPiwEDqZ6eMA677Hp4cHPu6JEU4mbREqGYAEbltTAptPQrr+Wp+zuaHM9IWE2MZfg9IRE8+p+6JtcGjbdG9iYU1QzEZ41ZZAEC1HRq35f5400MKFL0BOpe/LNTXL0xx9fpC7JyfR81yUa4kOQ1Zyk0R22csspOWyh4VEZyxSqHpn/FxO5YVGgcPRUW5XSjrqVVv8I3ZPfC4WrOv9OYOdwcsktqeM9574y3EBYBkiut09Xp3V94czhWXU7ar8NB0ELnsyCtJEaZBdAAiRXbS4iSbWEkhYZCxgetyszf9OCvjdM6Yy7Uivvj6BnXTgjpa7un1vQkFNUMxGRMmOMSHewDHuEcFuGouNyKXp477nsUy/bCr19NwLLsc/941UUySFCEielmDTFoAs+gAJGljbMznZUXGTnNM9DlajntWFd915Rbyaj2MJ1UWbq+rxjgHKwKY8JxGI09PG++9xob5c4lgdfXeTXhuiYFscvG8FOki+hdtmbQAe9xfIkmBYWUYvjUnc8Od2ZlzHYwVajl2YRvV/eBtPo0BHp9F1JdQcGgoNtNtkqPaqydcHDvemhYAyOLJxSC4XA/jcMSkDQ0l7XT1nh6q4Tv4tezbIkW6iK44yqRFJi2SIDujow8sKSxo2Robu3zC0oQPkgao87F1qp4BXm2R3lu6ss1dlQUAuKM1daL3J5oeMkAXp4My7lqyhvoF4540fSa2LsymI1vdxSdJk5hMWiJcDYAh0UFIkcMO2H+QlbHhptysGTaFTdN6fL2DrKvXqC16Dq8qN30Jxtqh2Cy3VRYi1QYanvCAxYkW4gJAIsW4bP8PAJ2dBSbOlcbx3l+FBwtBdEpPF0mKAH0AqkUHIZJMWip7OIB9osOQIsP+KMOhsiLjoQ/i45aDMX0gnvH4i+qOBCvmeXvfvtKVRz2p+HDH4YMAJjygcaLpIQDI5im2Ce5m7e1Txq3EZKI9byk27HATpiRNRrst5aaIPkhUJi1OcopICigO8Psz0jZ8PS+nYEhRSgL1nLvfVjcYO+H1Vum++PzawdhsjxbsctvBY+6uUdzkPvk8LWmi9xvqF5QSwTHe+7fimbP1ZG9wF4ckTTIRPTUEyKRlhExapICpN+gblxYa976dmLAcjEUH6jmXbeWbz67yfGvzaPtKb/aoygIAqqNx3DUnIyZa0wIAuTx1ykTv22xxWVZr/Lgf0Ho4or6NpzvcxSFJk4zLReqRRCYtTvJQNklzBNAv01I2fiU/N6NPp3g9XeON+bV8zw0fcZ8OUuyPz6sbjPOsykJEBN7ndh0OG383NAAgFlFpjNA60TWNDfMnHKQMm07PoLZt7mKRpElEVlpEBxAidgGut2BKki9a9LrW8oL8Xa8kJy0DY/GBfFZuJzWteoPnM8CnKs7e0pstnlZZSG2vB5Du7jo2ztlDo8UhxmUjuRFtbVMXErEJ25WvwoP5IDrlLCNJmoQGAVSJDkI0mbQAQGVPL4D9osOQJoc1KUkfX2TMi+/U6xYF+lnxQ9TzxAuqQ/EgkXClPz6vfjAux+Pmc6q9psWT65gHHy1ZPMnNQYiKrrPTeHCiK3JgMZ6FT2S1RYoEn1vKTaroIESTScsXPhEdgBTe2nVK+wXGvG3/m5qyFIx51R/FFzqV7KvXqnUGFcW+jrGvdGWrNz1iuL3Wo2vdbXkGgHye5nZtTH3douk0TiO6EbfjN2fpyNHsSVySFMYifmoIkEnLaDJpkXz2p8SErecW5CtHDPoJT0bW0qOvqFuTB7HA1/v743LrB+JyvWrxT+qxAk+uc7emBQDyeJrR3TXDw4lGuz3ms4muiYI9ZiXWRuypt1LEkEkLZNIymkxaJK/1KEr3V/JztzyakbaYGPNpisYXd7yrbii2wK9zivaV3twKxnSeXk+8vw3gHjWs82RNSxLF5oPQ5e66pqa5E/R0cSrHh2em0jHZu0WazCJ+5xAgk5bRDgJw239Ckkb8Iz5ux7LC/OH6KMOSYD73gp1864o9tMyfMQbichoG4r2rsqi2Qx6fLqt4+NESA0ODu2ssR2aeToR2d9etQmUmiFwetihJYW4YsgkqAJm0fKGyhwDIM00ktwYY6782L3vTqqyM0zljOcF8dmkj7f/W+3w+m+CEZU/sLb25xZsqCwBwe43HCYEn00MAkM4Te91dQ6QYurty3S6Uz0dL0ULskBVTaTL6xFJuGrfZYiSRScvJ5AeeNKH1cbG7lxYZu/ZFR/s1NeOLzG5qvf+PagYD3C5gnchAXHbjQHyeV1UWAOAOS6bHF3uwEBcA8niaR0cZ1NUt8mha6rv41ZkKqRP2f5GkMPRv0QGECpm0nGyL6ACk0GRlGP5WTtaG72VlzHcw5tFiVC3FWqnvqefUfoWQ5e9Y+0pvPuxtlYXINgDYJzwkcTRPtjwDQD5P86hSNTiYOtVuj3LbBDIa1rhvYt24hy1KUpj6UHQAoUImLSf7BICcE5dOsj0mev+SooLWT2NjloMxv6ZlfKFwUp9aq1ZFO+Bx0jCegdjsxv74fO+rLPbGagAeH/CoeFhpSaOEKSDPTlk/fPi0fk+uuwD/OjuJuuVOC2my6AYgF5kfJ5OW0Sp7hgBsFh2GFBrsgP37WRkbbs7JmmVjzOdeKP564DV1c3o/ztBirH2lK5u9rbIAgGo72OPN9QyKR8mdAqYzQOfRAt/WltkLieBRHKvwUAqI7J5cK0khbr1sKvcFmbScSs4dStgbFVWzpMhY91F83HJffshrpeIDdWNJM/zaKTRiMDarqT/B6HWVBQC44/CEpzKP5elCXABIpfhOj2Lg+tje3kyPzgkrRGPxXOyW073SZCCnhkaRScupZNISwVRA/UlG2vpv5GUXDSvKLJGxLDPz7RfvoDKtxttbenMjGPN4imcEEVdBgzO8ucebWbRcnurxxfV1i7I9vfa/8MQiRqrF40AkKTTJn0mjyKTlVDsh+7VEpFqDvmFpoXH/O4kJK8BYlMhYprVS9Z3/4LMZoEmVZzA2s7k/wXi2L/eSeqQGQKI39zC4by43wqime9yUr68vc5bDoffonLBYDCdciz943FtGkkJQi6XcFPGHJI4mk5axKns4gI9EhyEFDwH0WFrKhivyc7P6dcpc0fGk9lHbI6+oCczLRGEi+3yssgCAaqs+6u09zItSSyYlFYPg8Zx9a+tsj6aTAOBSvLMkgfo8mlKSpBAkp4bGkEmLax+IDkAKjsN6XcvywvzdryYnLQdjcaLjibLT4Oq1aoeOkKfVmIOxGYf7Egp8PhOJ2+u9rjp5uuUZAPTQxeig1Ht6/eHmOSYiDHh6/Y/wcByIZGMuKRzJqaExZNLimvyLEgF+l5L88ZeNeYldOp3Phw5qioiefF7dE2tDqZbD7iu5uR6MGXwOi/dM8fYebxbiAkASxXpczVFVQ2J/f5rHW5qnoXZGCfbJRbmj9DxeibYrv4SOm7920uuDb/0RHTdegY6VV6FvzWoAgG3vbnTecg0677gejpYmAADv70PXj74DIo9nASXfyJ9FY8ikxZXKnloAci58kmrT6drOL8j79JnU5KVgzKtdMYG06g2+MbsbPu3uGc9QTMbhvsRCn8fkatdhgHK9vY952KdlRDZP8aoS0lC/MM2b6+/GL+Yz4m7PL4oUsRdehtTHnj7pNdtn22Hdsh7pz7+BjHV/Qfw1NwIABt/4PZIrf4mEb30PQ+/8GQAw8Pu1iL/+ZhFtiyLJAUu5SZ5ePoZMWsb3jugAJO39ISnxk/MK8vQWvf5M0bGM9vUN6qYFdbRc63H3lq70q8rCbTU+dZf1tE/LCCNP8yp57O7OPU1VdTWeXh+PweSr8Hq1N8+YzKLmL4KSlHzSa4Pv/Blx31gJFuWcDVRSj+eFej3IagVZhwCdHo6WZqgdbYiaf3qww440ssrigkxaxvdX0QFI2ulWlK5LjblbfpGeejYx5tVv6YF2VhXfdeUW0rTCAgBDMektfYlFfo2r2g/51NTK2+mhHJ461dtnWCzTvTpj6Aq8uSSOBszePidSqIcbYTd/hs7vfBPH7voW7FXOQ4Xjr7sZfU89gsG/vIa4r16L/hf/FwkrvyM42ojwf6IDCEUyaRnfxwA6RAch+e9vCfHblxfm2xoNhiWiYxmrsI3qfvA2n8YAn6sh49lXurLWnyoLAJDa7tOCYG+nDWJgSGHEDntzT1Pj/PlEGPY4JoD9EI8aQMS9Ci5CkKqC9/Ui7elXkHjb3eh+6EcgIhimz0La068g7annoLYehi49EyCg+6Efo+dnP4F6zOPNXJLnuiErLS7JpGU8lT0qgH+IDkPyXT9jfdfk5Xz808z0MzhjHjclC5akAep8bJ2qZ0Cy+6u9MxST1tqbOMWvKgvxoW5Anebb3d4vdoinaK+SFocjOmVoMHmnN/fMxMHZM1D9sXeRRQZdZjZizjkXjDEYSuaAMQXU03XifSLCwB+eR/w3v42BV9Yg4abbEXPexRh6+48Co560/mYpN9lEBxGKZNIysbdFByD55sO42M+WFhl7DkRHLRUdiyt6B1lXr1Fb9ByFgRh/X8nKWn8b5Kn2uhrAy3me47xpLjcii5I9rpqMaGgwxXt7z7342TxGXJYHxoguWwHbZ58CABzNjSCHHSw59cT7w+/9HdGLz4GSmAQaHgaYAjDF+WdJa2+IDiBUyaRlYu8DnveDkMQbZmxoZU7WhruyMkwqY0bR8Yzn8RfVHQlWzAvE2EMxaa29SVN97ssygturff67701zuRH5PM3rBKSzs2A+54pXi4UT0J/yFbzlUVfdyar74ftw7LsVUJsb0X7NhRj659uI/fIVUI+0oOPmr6Hn4fuQ/OOHTkzz0fAQht7/O2IvvxoAEHf1DeipvBf9z/8WsV+5WuS3Mhl1QfYKGxeT++zdqEx+C8BXRYchubc1Jnrvd3Ky4u2Meb2oM5juflvdcHaV9juFRuxc8IONPcnT/D5kcbj76T0gq0+J1bm5N2zKiMk/x5t7+jB05PWYLV5vr54xc8v6nJzaFd7cw8H4rXilapjFadoTR5I08JKl3LRSdBChSlZa3JO7iEKcDbB9Nztj/a05WSWhnrBctpVvDmTCMhSddqQnqdjvKguRwwqy+nxgpLe7hwAgEbG5IHg9bdPQYDqNCF71eVFAyj14jGR3NCkEyamhCcikxb2/A5ALokLUnuio6rIiY8OGuLgVYEyTwwUDZX4t33PDRzygzS32l95UA8ai/R2HOw5XA/B5HF+bjsUiqsHbe+y2uEzrcMIOb+8rxb7TpqJus7f3SVIAdUHuGpqQTFrcqezpAvBP0WFIJ1MB9b7M9A3X52ZPGVaUmaLjcSe3k5pWvcHzmR+JgDvD0amaVFkAgNsO+rVQ1dvmciPSeWKfL/c1NM736TDIH+GREhB1+3KvJAXAXy3lJrvoIEKZTFo88wfRAUhfqDYY6suKjFXvJsQv93eHTDDED1HPEy+oDgVID+Rz9pVoU2UBANXR5NfhkcyHLc8AkM/TfPr/2d42dSER86rZHAAkoTf9y/j7Hl+eKUkBIKeG3JBJi2f+AWezH0kgAuhn6akbrsrPyRlQlNNEx+MJnUr21WvVOoOK4kA+Zzg6xdKTPE2TKgsREXjfdL8G8XF6KJ+neb0Q9/gDlY6OAp/a9F+H35dF0/BB354rSZo5BuBD0UGEOpm0eKKyxwrgTdFhRLImvf7wssL8z/+YlLgcjMWKjsdTj7yibk0eRMBPkd5fclO1VlUWUtvrAPh11IEvC3EBIJXii0C+tRloqF84gwhed7tVwHX/hV9a5aJcSTA5NeQBmbR4Tk4RCfLblORNlxhzk7t1OpPoWLxx+7vq+mkWeLXt1xfDUSlHu5Ona3YApGqv9nqaZSxfp4cYmBIFvU8nrA8PJ+bbbLG7fLl3PnbPM6Jpiy/3SpJG5NSQB2TS4rmNAJpEBxFJjup0R88tyNu+NjX5HDCWKDoeb1ywk28t3xO4rc2j7S+pqAJjMVqNx+11vpVJRvGludyIVIrvcn+Va81Nc73a+jzafXh4Boh6fb1fkvzQATk15BGZtHiqsocAvCY6jEjxclLilvML8qLa9PozRMfirdJG2v+t9/l85mMLfG8MRyW3dafM0GQtywhSj2lwtIDvSUsuT/X5c8limbGICO2+3JuKrqzz8N5nvj5bkvzwkqXc5HPCHUlk0uKd34sOYLI7piidlxhzP3kiPXUJMZbq/o7QktlNrff/Uc1gQFDW3ewvqTigZZWFeN9RgPudtPiTrRnVdJ93WREphq6uvH2+3n8jXigzkLXG1/slyQcEYI3oIMKFTFq8UdmzH8BW0WFMVm8nxH+6ojCfNxkMZ4uOxRexVup76jm1XyFkBeN51qjktu6UmZqtZQEA1XaoXotxfF3TAgCZlFQMLzvcjlZft2iKr/fqwPXfw6/keWNSMH1oKTcdEh1EuJBJi/eeER3AZNPHWO/X8nI+vj8z/UxiLFN0PL5QOKlPrVWroh0IWqO7/SU3HtB6JxW311g1GciPNS06KNE6KD4txgWAwcGUKXZ79Oe+3r8I20251CIX5UrBIn+meEEmLd57A8799JIGPoiL3XVOkbHvYHTUUtGx+OOB19TN6f0I2voba1RSe1fKLE2rLADAHUc1qRL5U2kBgGSKa/Pn/sPNpf3+3H8fHpoKIr/GkCQPtAJ4R3QQ4UQmLd6q7BkG8LLoMMLdEGODN+ZmbfxBVsYClbF80fH4o+IDdWNJM/w+Vdkb+2ffuF/rKguRrR+wa1Qp8i9pyeEpqj/3t7TOXkTke0PIDHTkLsd/vD7PSJK89LxcgOsdmbT45lk4F09JPtgSE2MuKzK2fRYTs8yfaYQRxAmH7j+Exl81AgCan21GzU9rYHnTcuKatr+1oXeX9rtZl5n59ot3UJnmA0/Aakhs70qdrXlVh9sbagBocuikr83lRhh5eoo/9xPXx/T2ZPnVnv9mrCnTk12TNT6S5IIK4DnRQYQbmbT4orKnGsBHosMINzbAekd25obbcjJPszM2RatxO9/vRHSesxnscPMwAGDGIzMwWD0IdVCFvduOobohJC1M0uqRAIBprVR95z/4bKbRD3pPHSi5cT8Y8+tsIFdUW7VmWR1jzK/PlhyeMhXk3y8GdXWLfDwSwEkP1XAHfiOngqVAeddSbjosOohwI5MW3z0rOoBwsjs6qmpJkbHp47jY5fDzB9po9mN29H3eh9Rlx3dH6wCyE4gTyEGAArS91YasK7Xd0JPaR22PvKImMCCoTe9shsSOY6klAVk7wx2Htfxe/Cq1REGfpID51cyxvz9jhsNh8Hn7MwAsxpZFWWSROwalQJALcH0gkxbf/RWAxe1VEc4BOH6Ymb7hm7nZ06yKMkPr8Y+8dgQ5X8858SMyJi8GhjQDah+oRfIZybAUqO2VAAAgAElEQVQdtQEAYou0W/4RZafB1WvVDh0hT7NBPbS/5Ma9gaiyEHEVNKjZzid/F+ICQALF+H2cQGvLbJ+7645YhQcLQDTo7ziSNEo9gPdEBxGOZNLiq8oeO2RDoAlVRRlqlxYZq/8vIX45GDNoPX7v7l7ok/SInXJyQpJ7fS6mPzwdGV/OcFZZvpqFtnfa0PR0E46t97PaT0RPPq/uibWh1L+BvBfIKgs5WmsAJGg3ov9JSyYl2/wd4/Dh00xE8GsXUBba8pdg03Z/Y5GkUdZYyk1yXaQPZNLin6cBDIkOItRwgD+cnrrh6ryc/AFFCdgP98GaQfR+1ouD9xzE4WcOo/9AP5rXNJ94v3dXL2KnxoJbOawtVhTeWYjuLd3gVq8PAj5h1Rt8Y3Y3FmsRv7f2z/7mPjAWH4ixVXu1X1uMx2LwfwrQqKb5XVFSVUNCf3+63635v42nF+vI0ejvOJIEwAbgRdFBhCuZtPijsqcdcvvzSRr1+uZlhfnmN5ISl2vZXt6VnKtzMPtXszHryVkw3mFEQkkCCm4rAACQg9D5QScyvpwBbuNfrLAg53u+uGajumlBXXAOQRzLZkjoPJZWenqgxuf2Bq0rYX5XWvJ4qgZnIAH1dQvT/B3DAEf0LXhG08ROilhvWspNPp2PJcmkRQtPAvD9V/dJZHVq8qZLjbmpPTrdfNGxdH7YiZSyFCjRCmIKYgACan5ag7gZcdDFe7/Z58yD/LOrNpOQCgsAHJh9gzlQVRYAIN4zVdMBNdjKHo+YbPh4+OFoPT05p6mqrtrfcZZh/Rlp1CGniSR/EIDHRAcRzvSiAwh7lT2HUJn8VwBXig5FFItOZ7kuL/twu15/jqgYEkoSkFDyxZKMjAszTvyZMYaCOwp8HruwjerueYsXM0DzdTmesBnij3WmzQlclUU91gyQ7/+BXNDqhOs4RDUOwub30Q6WIzMs+cYqvxca/zcezL6XfjMc6CqiNGm9Yyk3mUUHEc5kpUUbj4sOQJQXkpO2XFCQF9Ou1wfsh6pISQPU+dg6Vc+AZFExHJj1zT1gTMNFsifjthq/tha75n+lBQAyeJImrfSbmubNI8Kwv+PkorXwDGzbpkVMUkR6RHQA4U4mLVqo7NkGYJPoMIKpU1E6vmzM3bo6LWUJMeZX99JQpXeQdfUatUXPocnaCl/YDPHHOtPnLArkM1T7Ic2nN5lGny35PC1ai3EcjuiUwcHknVqMdQd+faZCDtkUTPLWe5Zykzwawk8yadHOL0UHECx/TozfVl6YT4cNBmFrPILh8RfVHQlWzBMZQ9WsG/aAsYA2sCO1IwD9ZrSptOTxNM1ia6hfoEm1Khq22JvwfIsWY0kR5WHRAUwGMmnRzj8A7BcdRCD1KqznyvyczQ9lpJ9FjPm9ziCU3f22usHYiaCeKTSWXR/f1ZE+N6BVFuJDXYBaHIChNUlaUiiuEARNjhc4dqxgPueKJmcJnYsPzkqhLvlbs+Sp9ZZy02bRQUwGMmnRSmUPYRJn0v8XH7fznELjYE1UlNAf5MFw2Va++ewqMVubRzsw6/qAV1lUe+0haJRgjKZFn5bj47Ao6Bu0GAsA2o4Wa9Zr5T48mA4ivxvgSRFBrmXRiExatPU6gL2ig9DSIGMDN+Rmb/xhVsYizphfB9CFg/m1fM8NH3Hhi4rt+rjujox5CwL9HG6rDkx7eqbdZ0saJfjdin9EQ4PpNCLYtRirAM1TTdi5RYuxpEntE0u56UPRQUwWMmnRkrPaUik6DK1sio3ZU1Zk7Pg8JnqZ6FiCIbeTmla9wfMZoMniT39Uzbp+NxjT9lhqF7h6JDVAQ2tWvclTUzU7Rdtuj80cHk7QZEEuAHwPT52hkHpEq/GkSUlWWTQkkxbtvQVgt+gg/GEDrLdlZ274TnbmHAdjRaLjCYb4Iep54gXVoQDpomOx6+O62zPmB7zKQuSwgqyzAjG2VtNDAJDP0zVdP9XYYNKs304MrPHX4+UGrcaTJp1dlnLTP0UHMZnIpEVrzmrLA6LD8NXO6OgDZxcVHN4SF7scTLsfPKFMp5J99Vq1zqAiEAtSvVY167rdYCzgfWG4o7kagasqaVZpyaTEYhA0WzvS3j5lAedMsy3LF+HdsxOpx+/zjaRJSVZZNBYRP5SCrrLnHQBhtbPAATh+kJWx/qbcrBk2hU0THU8wPfKKujV5EAGvbHjCro/tac8wBSUWbjvo55HXE9Eu4VWgGPRQ6rQaD2BKZ0fhIe3GA+7Dw0kgcmg5phT29gL4q+ggJhuZtATO/aID8NSBKENtWZGx5oP4uBVgLKKOdrj9XXXDNAuEHT8w1sGZwamyAIBqb4oN4PCafrYkU7ymB8zV1y+cRaTdmWFTUD/tNOyRW1ql0R60lJt8O51VGpdMWgKlsudfAD4RHcZEOMAfyEjbcE1ejnFQUUpExxNs5+3iW8v3UMgsMrbrYnvaMheYgvEsIiJQ/4wAPkLTbdS5PEXTrr1Wa0KuzRar2YJcALgbv1zIiMuToCXAuWPoTdFBTEYyaQmsH4oOYDz1Bn3jOYX5e99KTFgOxoTvlgm2kibaf+t7fJ5WB/tp4eCsbwStykJqWx2AQO0c0qyN/4h8nq55rE2N8zRNhGIxlHgNXqvRckwpbN0jOoDJSiYtgVTZsxnAn0WHMdaTqSmbvpKfm9Gr0wltUS9KZje1PvCams6AOE/vUYlwZUM97jjcDAD4YWsrrqivx6/av5i1eKajAx/29fkUk7PKsjAoVRYAUG3VrQF+hKbJYA5PLoaG0zkAYLFMX0QETSsjX8HbZfHU97mWY0ph501LuSmkq+zhTCYtgfdjAFbRQQBAq153pLwgf+dLKUnngLF40fGIEGulvqeeU/sVQrY39/2+qwvTopwFqYPDzsOC/zp1KnYODaJPVdHucMA8PIRzE31rYHtw5rWfBavKAgDcXhfgf/tMs94qAGCAPkEB0/g0akV/7Fi+5kdv/BiPxIJI1XpcKSzYANwnOojJTCYtgVbZUw/g16LDeC45afOFxry4Dr0uoGfZhDKFk/rUWrUq2oGZ3txnsduxYaAfVyU7cwo9Y7ASByeCnQgKY/htRzu+m+FbOxGHLqa3LWtR0KosAEC8K9AnV2v+2ZJAMZo3cauvWzSVCJoulpyGQzNn4YBclBuZfmcpN9WKDmIyk0lLcDwKQNPdD57q0CntFxnztv4mLaUsmL/Jh6IHXlM3p/fjDG/ve6ytDfdmZkE5PuExLToauQYDrmpswEWJiWiy2UAASmNifIrr4Mxrd4GxFJ9u9gHxPgvACwL8GM0/W7IpRfOK5dBQcpHdHq35dM49eGw+Iy7k37wkzDFM4vPnQoVMWoKhsqcXAhrOvZ6YsPVLBfmsxaBfHOxnh5qKD9SNJc3weqfQ+v5+pOl1OG1MQrIqKxtvT5mKlWnp+G1HO76XkYFnOztwd2sL/tzd7fH4Dl1039Gs0+d7G5c/VFuNJicdu6H5Amejmpag9ZgAcLh5jubnL8VjIPmr+PNBrceVQtr9lnJTAHsfSYBMWoJpLYB9wXhQj8J6rsjP3fxIRtpiYiwjGM8MZcvMfPvFO8in06l3DQ3io/5+nFd7CPe0tmLb4CB+1PrFGtYP+/pwWkwsBjnhkNWKX+Xl453eHgxxz9aMHpxx7S4wFrBdPK6o9hpNDgx0Q9M1LQCQy1MDcqREa+usRUTQ7FDGEVfijbJYGphUB6hK4zIDeFZ0EJFAJi3BUtmjIgjb4N6Nj9uxrNA4WBtl8OmH9GQzrZWq7/wHn818/CH6g8wsfDRtOv49bTqezMvDWXFxeDwvDwBgJ8Ifurtwc1oahjkHO15coOPvuePQRfcdzT4j6Du4yNGm6Vk+49D8syUO0ZmMYNF6XCJddE9P9h6tx2UAuxc/14FI011PUkj6L0u5SS6+DgKZtARTZc97cB6oqLkBxvq/kZu96b6sjNM5Y7mBeEa4Se2jtkdeURMY4NuWHjf+2NWFy5OSEasomBUdDQLh8vp6LIiNRZLOfY5UPePrO4NdZSGy9QN2rxYi+yggny1xiG4OxLh1dYvyAzHubBwomYaajwMxthQy/mIpN30kOohIEVEt20PE9wGcDw1/kG6Ijfn8ruzMNAdjIdOOXrQoOw2uXqt26AilWo15Zlw8zoz7Yqf4jWlpJ/7MGMMTeZ7/3HPoovst2WcGvcrC7fXVABYG4VEBSVoyeNLAgE779a0D/enTHQ7DXr3ePmfsezYbx913HYHdTlBVwrJl8ai4KQ0/+1kb6utsWLw4Dt+6xfl34Q+/78LU4iiUlX3x9+Re/GzuHfRiF5gS1ARVCophAPeKDiKSyEpLsFX2tAD4Hy2GsjIM35qTteG72ZlzHYwFejdI+CCiJ59X98TatEtYtFY945odYCzN/ZXaUm3VvnW/857ma1oAIJ+n+bZFywMtLSUu17UYDAxPPJmLtc8ZsWatEdu3D2HPniEAwHPPG2E2D6O/n6Oz04GqKutJCQsAJKEv9TL81RyouCWhfmEpNzUEanDGmJEx9jfGWA1jrJYx9mvGWBRjzMQYu3jUdZWMsYhInmTSIsb/AtjlzwA7YqL3LyksaNkaG7McTLsTdSeDVW/wjdndCNkdU8erLHNFPJs7DicF4zmMsYAcj5DHUwMyjQMAh5tLFxLhlKSOMYbYWOc/MYeD4HAQiACblYNz59c6HfDSS12ouMl1MeUavLY0moYOBCp2SYi9AH4WqMGP/xt6C8BfiWgGgJkAEuBsoWECcPEEt3v7rID8khEI8oedCM5FubcB3rcltwP2u7Iy1q/MyZppU9g07YMLb9dsVDctqKPlouOYSM30q3eAKenBfi4Rd4CGgrGeJWAn2yZTnBGEnkCMzbkhvq8vfber91SVcNu3D+NrVzVi0aJYzJ8fi6wsPe64vQXLl8ejpcUOEDBjhutjvBSQ8gP8QgV5sEJbCgcqgJst5SZbAJ/xJQDDRLQOAMjZZfluALcAeBzA1xljuxljXz9+fSljbD1jrI4x9v2RQRhjNzDGPj1+7ZqRBIUx1s8Ye4gxtg3A2Yyxxxhj+xljexhjTwTw+/KLTFpEqezZAeB33tyyLyqqpqzIWPthfNwKMCbXI41x5kH+2VWbKWQrLACgKlEDR3LOElJlIUdrDYCAH9/AwAK2W4aBsWgYAtZnpr5+kctkUqdjWLPWiD+9XoiqKivq6234zp0ZWLPWiKuvScFL65xVlldf7cJDDx3Fu+/2njLGHJjnFKFBdsqdHJ6ylJu2B/gZpwE46SRyIuoF0ADgEQCvE5GJiF4//vZsABcCOBPAA4wxA2OsBMDXAZQRkQnOZOv649fHA9hLRGcB2A/gqwBOI6J5x8cPSTJpEesnANweXMcB/j8ZaeuvzcsuGlKU2UGIK+wUtlHdPW/xYgYYRMcykeoZYqosAKDaDwalQysDC+jWzzSeEJBKCwD09mSXqqpu3KZwCQk6zDfFYvv2L/rRbd48gJmzojE8TGiot+H++7Px7w/6MTx8au72IzwyC0QBi18KioMA7g/CcxhcVy3He/1dIrISUQeANgDZAM4FsAjAdsbY7uNfFx+/XgXwl+N/7oVzUfHzjLErAWjecFErMmkRydkp978muqTOoG9cWmjc99fEhBVgLCpIkYWVpAHqfGydqmNASB9T4KyyLD7N3XWcq3jszdvwzL/+GwDw0oc/w8/+fAve2fb8iWv+tfP32OPlL+3c3hCUvz+MKQHtS5LPUwM6/37kyMyjo7/u7lbR3+/Mw6xWjl07h1BY4MyNHQ7C22/14JprkmG18hN9gDk517qMlYLuzAvxrsspKCkscDinhYaD8Kx9AE4f/QJjLAlAAZwJx1ijj7lQ4dwdzAC8fLwiYyKiWURUefya4eNTTiAiB5wVmr8AuALA/2n5jWhJJi2iVfa8CeDPY18mgB5PS9l4eX5uZp9OETKdEA70DrKuXqO26DkC0i1VSzXTv7YDTHHbofijvW8hO9V5nmFLp/Pstf+++nnUWswYsvajZ6ATjW1VmDfFu/6BxHuL3V+lhcBNDwFAPk/36oRubzU3zZ1PhKGRr491OnDPPUdw6y2Hced3WrBoUSwWn+2cZfvb33pxwQWJiIlRUFwcBRBwyy3NmHNaDBISXOdWN+DlpVFkrQ7k9yAFzG8t5aYtQXrWhwDiGGM3AicWyz4J4CUAR+FZ24wPAXyNMZZ1fIw0xtgpn5WMsQQAyUT0TwB3wbnQNyTJdRGh4TsAlgPIAoAWva71urwcyzGdzuuzciLN4y+qOxKsCPnuv6oSNdiae7bbLdhd/e3Y17gNFy68Hv/Z82foFD3sDis4cTi4A4qiw7s7XsIlZ9zk1fO5eqwJoECf7AwgsGtaACCdEqaCMAyGU7Y/OxwOrFu3DqqqgnOOkpISlJeX46233sLRo0cxc+ZMnHvuuQCADRs2IDs7G7Nnzx4zRnTy4GDK5vj47jIAKJ4WjTVrjC5jueqqL4p7jDH85Kfu8ykFXPd9PDH0BH7ixXcthYA6AP8drIcRETHGvgrgd4yx/4GzyPDP4zHEA7jv+JTPzycYYz9j7KcA3mfOXaZ2AHcCaBxzaSKAvzHGYuCsztyt+TekEZm0hILKng5UJt8B4C/PpiR9/HRK8lwwFowGYGHt7rfVDcZOhPROoRE106/aDqa4jfUvW57GFYu/jWG7c0o5J7UIqQlZ+MVfbseZM85De08LCISCjBlePZ/bqpsBBCdpYYFd06JA0Rugq7ZDPSUJ1Ol0qKioQFRUFFRVxbp161Bc7Cww3XHHHVi3bh2Gh4dht9vR2tqK5ctd/y+pr1+QMGdO4JqcLsCu+fnUtKWFFS4J2EMkLRGAWyzlpqCu9SCiZgCXuXjLCox/Yj0RzRn159cBvO7imoRRfz4C5/RQyJNJS6io7Hnr2rUzf7UvOjpkM9xQctlWvvnsqtDe2jxCVQxDrblL3FZZzI2fIDE2FYWZM1Hd+sWyh6+V3Xniz8/+6ye4dtnd+L9dr6KlsxazjYtQVnKJ+xjstUE7/4aBBXxbbzLFdXawU/vkMcYQFeVcusM5h6o6dxnb7XYQEVRVhaIo+Oijj7BixYpxx+86ZpzPuVKnKDxgU2r34eFp36O1fWAsIMdMSJpaI1v1hwa5piWE7IuOfghAQM5WmUzm1/I9N3zET3d/ZWiomXbVp2CK20MK6yz7YG7cgvtfvQ7r/v0Iqlt34+UPv+hdtadhMwozZ8HmGMaRY/X41vn349PqD2Czu18TSGpHwJqyjcUQ2IW4AJDLU8dNjDjnePbZZ/HLX/4SxcXFmDJlCpKTk7FmzRqUlpbi2LFjzjFyJz6i6+jR4oD+W0zDsewv4QO/mkxKQdEE4Eeig5CcZKUlhJgrzN1zX557E4B/48Q+BGm03E5qWvUGz2eA6y5eIUZVDEOteWUeHSdw+Vm34PKzbgEAVLfuxoefv4GKc51T6KrqwHrzW7j9okfR1tsCsJETpQkO7sBE24KIDx4D1Kn+fSeeYyywa1oAwMjT0sxocvmeoii4/fbbMTw8jNdffx1tbW246KKLTrz/xz/+EZdeeik2btyIo0ePori4GIsWLTplnMYG02k5OYdsjE34n9cvN+G5sk20otbOomSjyNB1q6XcFKzjLyQ3ZKUlxJgrzP8BsFp0HKEofoh6nnhBdSiAkD4nvjg07UqPqizubNz3N5w58wJEGWKQn1YMEOHRP9+C4uzTEBedMOG9qr32ENwkwZwTnnp/E17Y5OyX9erWz/Dkexvxzz1VJ675YF8N9rZYPIh2/KRl2GHFpa98Gxe8uBLnPn8jntz0IgDge39/COe/eBMe27D2xLWrN7+M92o2uRwniydPBU3cUTomJgZFRUU4dOjQideqqqqQm5sLm82G9vZ2XH311dizZw/sdvsp99vtsRnDw4k7T3lDQzpw/Z1YfWonOilUrLGUm94XHYT0BVlpCU2rAFwAZ0dECYBOJfvqtWqdQcUC0bF4SlUMQy15S0t8uXdmngkz877YdVg+76oTf2aMYeV5P/V4LG6rGXJ3zaaaemQnJWDY7kBrt/Nn6D0XLsPT/9mCIZsddlVF07FunH+a+wXAygRJS7QuCq9fuxrxUXGwqw5c+eqdKJviXHP+wc0v4cpXv4teaz+G7MP4/MgB3FVW4XIcA/TxClgdB5205mRgYAA6nQ4xMTGw2+2or69HWZlzc5mqqti2bRuuu+46dHZ2nrhnZK2LwXBqX8KGBlN0SYnrxEkrZ2Dbghxq/cTC8s4O6IMkb+2Gc/uvFEJkpSUEmSvMVjhbLQfyXIuw8sgr6tbkwfBJWADgUPEVn4IpWaLj4OqRCStT3YNDOHCkDWdOdR4UrlMY7KrqbJDGCQpjeG9vNS6a4+GxRWz8hbiMMcRHxQEAHNwBB3eAyFmB4cRhV+3QMQVPbnoR95xz84SPSaTYU8o+/f39ePnll/HMM8/gueeeQ3FxMWbOdMa9fft2zJ8/HwaDAdnZzq3JzzzzDAoKChAT4/rw6I72ogWcs8OefeO+uw8PTQHRQKCfI3msD8A1QWoiJ3lBVlpClLnC/Pncl+eugrOZUES7/V11wzRLeGxtHqEq+uGW/GXCj1wgcgyDrBNmG3/bvR+XzivBsMMBAMhOSkRqXCxWf/AxFhblo6N/AAQgP9WzhsPuFuKqXMXFL9+Khq4WVCy8AmcXmvBe9UZ8+aVbcOVpF6Chy7mte072xElSDk+x9Sgn70DNzs7Gbbfd5vL6xYu/OJaKMYarrrrK5XVjvhvW0VF0KCurwXWjFo1koj33HKxfvwnlKwL5HMljt1jKTTWig5BOJZOWEGauMD819+W5iwFcLToWUc7bxbeW76Gwa7JXW/zVbZ70ZQk07miqBjBvvPf3tx5FQnQUjGnJONT2xZTJ5Qu+mJl8YdN2fO30ufj3/hq0dvdhZnYGFk8bv+WLuy3POkWH91a+iJ7hPtz69k9R1V6HyvNOHEqLlW/eh59feC9+s+UVHGirxTlTTsd1plNbVeTztKSD7o/u8ltD/cJZmZkNKmMI6PEBt+DZJZ/Q0noHMwRt0bTk0tOWctMbooOQXJPTQ6HvZgAHRAchQkkT7b/1PT6PhdlOKs701sP5y2aJjgMAVFt110TvN3R0YX9rGx79x3/w6tbPcKitA69t/ezE+3tbLChIS4bNocLS048blyzEzsYW2Bzj94/ztLlcckwizi4wYX3dthOvvVezCfNyZmHIPoSDHfV45ooH8Zd972HIxbbuXJ46xZPn+Mtqjc+1WeMCuiAXAPRwRN2G/+10f6UUQDsB/EB0ENL4ZNIS4swV5n44jwyPqC13md3U+sBrajoD4kTH4q1D0y7fBqbkiI4DALi9acL/fhfPm43/uexc/OTSL+H6xQswPSsD1y12Lh1SOcemmgasmDUNdlUd2WUNAkHl488AMSjjVlo6B7vRM+z8qzxkt2JT405MT3cehWJXHXhxx5u4/axvYMhhBTueqxIRbOqpu3tiEZXGKAilFgCNTeMWqzS1BB+fnkFt29xfKQVAD5zrWORawhAmk5YwYK4wHwRwk+g4giXWSn1PPaf2K4SAHowXCM4qywrveuwHCBERqN/nWDYfasTpU4yI0uuQm5wIIuCJ9zZiSnoqYqNO3WkzYqLpobb+Tnz9j3fh/BdvwqWvfBvLppyO86Y7O9m/vOstfG3ORYg1xKAkcxoIhPNeqMDpxjlIjnHdNDYOMQFfJAsARy3TFhKxo+6v9N8qPJgPIrc7viTN3WwpN9WJDkKaGCMKeMdtSSNzX577C0zyzowKJ/Xpp9Vd6f3jn6sRymqmXbmxueDckFiDwx1HD9n6Xp0e7OemRGXVXpi/MijN0v5t2LOhQdcelLVDpad9tD49/fCKYDzrt7h7/Va2NCjPkgAAv7aUm+T25jAgKy3h5b8B/Ed0EIF0/2vqx+GasHCmtzYbQ6PKAgCq7eAREc8N9CnPoxl5uuu9ygFQX7ewmAhB+S3vNvzvYh05XLf8lbT2KYAfig5C8oxMWsKIucKsArgWk/R8oooP1I2lzeG1tXm02uLLtoHpJj7QJoi4vT6gu13Gw9j4a1q0lsdTg3am0tBQcqHdHvOZ+yv9FwV7zM1Y40n7Yck/XXCuYzl10ZQUkmTSEmbMFeZ2AFfBeTT5pLHMzLdfvIPKRMfhK850tmZjechUWQCAeFeRiOcG45TnEUkUZwRhwh1SWmpunhO0ZmMr8J8z06hje7CeF4E4gBst5aZG0YFInpNJSxgyV5i3A/i+2wvDxLRWqr7zH3w2Q2D7YARS3dTLtoZSlYV47xGAB60KMdpEu4cCIQaGhmA960jrzNOJcCxYz1uFB7NANKl+QQkh91rKTf8QHYTkHZm0hClzhXktgBdFx+Gv1D5qe+QVNYEBrreHhAHOdLamgi8FfcHrRFTboQZRzw7GKc+jpfPEoB04SKSL6unOMQfreXloLVqET7cG63kR5GlLuelXooOQvCeTlvB2J4AdooPwVZSdBlevVTt0hDzRsfijbuql28B0IfU9qPYaYXP0wa605PG0oHb2rqtbFNCW/mPdiV+foZDaEsxnTnLvAvgv0UFIvpFJSxgzV5iHAXwFQPjtMiCiJ59X98TaUCo6FH8417KcW+z+yuAix1FhPW7YBAcmBkI+TwtqI7+BgbRpDntU0Kot0bDG3YgXJuXiewF2A7jWUm7yqGuzFHpk0hLmzBXmIwC+DKBbdCzeWPUG35jdjcXurwxt9VMu2UaKTsjakfEQWfsAh7DpqmAuxAWANEqYAkJQm7EdbinpCebzzsd7i5OpO+BHCUxyLQAutZSb+kUHIvlOJi2TgLnCvB/OVv9h0X76mo3qpgV1FLZbm0dwptibCs4LucPtuL2hBgIXNQd7ekgB0xmgC2on05bDpQuJELS1NACwCg+mgUhuzfVNP4BLLOUmOc0W5mTSMkmYK36XU0sAAB+GSURBVMzrAawEgtP8yldnHuSfXbWZwr7CAgD1Uy7ZSoouqOsbPKHaDgo9pyrYC3EBIIXig7ajBwA418f19WXsDuYzC9A0dR52bwnmMycJFc5eLJ+LDkTyn0xaJhFzhfk1OLvmhqTCNqq75y1ezIDxD64JE84qy/khV2UBAO5oSRb5fCXIlRYAyOWpQX9mfd2izGA/8/t4YpFCqpBOx2Hse5Zy079EByFpQyYtk4y5wvwYgKdFxzFW0gB1PrZO1TFA6A9UrTQUXRySVRYi7gANzRQZQ7AX4gKAkaelB/uZvb1ZJaqqrwrmM2MxnHAtfl8fzGeGuacs5aZnRAchaUcmLZPT9wH8SXQQI/QOsq5eq7boOYR0aNUaZ4qjsfCCKaLjcIUcLdUA4kTGEOw1LQCQxZOLQQj6jpAjrTPbgv3MS/D3JQnUG9SpqTD1NuSZQpOOTFomIXOFmQO4EcD7omMBgF+sU3ckDGOe6Di00lD05a2k6ApEx+GKaq9uFx1DsHcPAYAeulgdlKBXIJqa5pqIMBjs5/4YD8eDyBHs54aR9QCut5Sbgr6+SgosmbRMUuYKsx3AlQC2iYzjrrfVDQUdCNszhcZyVlkuDNmKEbc3RIuOIZgHJo6WRLFHg/1MVY1KGhhIDcohiqMVo25GKfZuDvZzw8QmOLc2B3UbvBQcMmmZxMwV5gEAFwPYL+L5l23lm5dUhf/W5tEaCy8K2SoLABDvnSY6BhHTQwCQzVOEVB4a6hcIOYLibjy+gBEXXlkLMZ8AuNhSbhoQHYgUGDJpmeTMFeZjAC4E0BDM586v5Xtu+IifHsxnBhpniqOh6MKQTVi42tkIUNB3tIzFGBPyXCNPSxLx3K6u/HmcK7XBfm4cBpO+hj8dDPZzQ9h2ABfJ5nGTm0xaIoC5wnwYwHIAQflgze2kplVv8HwGCJ+q0FJj4YVbSdGH7tSQreaw6BgAgCH4fVoAIIenCtuCftQyXch/+8vxl7I46t8j4tkhZheACyzlpqA2/JOCTyYtEcJcYW4CsAxAQH8zix+inideUB0KEPQtqIFEYGpD0UUhW2UBANV+KCQaCypMzMdKDAwpjJiQ5KGxcf4couB3pGYA+xEejQZRJJ+l8zmA8y3lprA6ykTyjUxaIoi5wtwKYAWAfYEYX6eSffVatc6gIuQOEPRXqFdZAIDUzpA4A0nUmhYAiKdoIW3a7faY9KGhRCFnA81A9ayZOBipi3L3AjjPUm4KakdkSRyZtEQYc4XZAqAcgOYl5UdeUbcmD2KB1uOKRmBq/ZQvh1wjudGID3YCakh06BWx5XlEFiUJ2zHS2LBA2HToPfj5PEa8U9TzBTkA4FxLualDdCBS8MikJUgYYz9hjO1jjO1hjO1mjJ3FGFvPGDvIGPucMbaZMTYrGLGYK8ztcCYumv1mePu76oZpFpyj1XihpLHwgpCvsqj22kOiYxghaiEuAOTzdGGN9To6ChdwzppEPDsB/SmX4y8HRDxbkGoAX7KUm4Le3E8SSyYtQcAYOxvApQAWEtE8AOcBaD7+9vVENB/AywB+GayYju8qOhca9HE5bxffWr6HlvkfVehxVlkuDolpl4lwW/Ww6BhGiJweylNTBa47YqyjfYqwFvtfw5/KYmkwIFO/IaYWzoTFIjoQKfhk0hIcuQA6iMgKAETUQUStY67ZCGB6MIMyV5h7AJwPwOf58JIm2n/re3weA8T9eh1ATQXnbSVFP0V0HO5w9UjILHxmghbiAkAiYnNBEDZNUl+/YBYJOE4AcC7KvQc/ZyCazF1g9wFYYSk3CVm7JIknk5bgeB9AAWOsmjH2O8aYq4ZrlwEwBzkumCvMfXD2cVnv7b2Z3dT6wGtqOhN81k2gEJhaN/WSPNFxuENkHwLZhB6SOJrINS0AEIuoBlHPttnic6zWeCELcgGgBPtLp6J2si7K3QRgqaXcFBJb+yUxZNISBETUD2ARgG8DaAfwOmPspuNvv8oY2w2gDMC9IuIb1Tn3A0/vibVS31PPqf0KITtwkYnVVHDuVlIMIbG4dSLc3lwNIEp0HCOY4I+VdJ7YJ/L5TY3zhFYdf4RHS0E02bb/vgVnH5bJ9n1JXpJJS5AQkUpE64noAQDfBXDV8beuJyITEV1BRM0TDBFQ5grzEICvAHjX3bUKJ/WptWpVtAMh89u91giM1029NFd0HJ5Q7QdD6oOcMbGVlnyeJjSBO3q0eCEROyLq+UnoTb8Y70ymhnO/A3C1pdwUMuu2JHFk0hIEjLFZjLEZo14yAWgUFc94zBXmYQBXAHh+ouvuf03dnN6PM4ITlRhNBV/aSoohLPrNcHtTvOgYRlMEf6zk8zTByaai6+w0Cm2v/w38fmk0DVeJjEEjP7WUm+6UpzVLI2TSEhwJAF5mjO1njO0BUAqgUmxIrpkrzA5zhflWAD8BcMpvzBX/VjeUNmNS7hQa4ayyXJYjOg5PEBEHDcxwf2XwiK60pFJ8EQhCD8yrr1s4nejUfz/BooCUu/C4HUQh0SXZBw4AN1vKTY+KDkQKLTJpCQIi2klES4iolIjmEdGVx3cQrSCiHaLjc8VcYf4ZgBuAL1qTn7OX77h4Oy0VF1VwNBvLw6bKQqqlFkCy6DhGE72mhYEpUdDXiYxheDjJaLfH7BIZwzx8PrcQjeG4KHcQwOWWctM60YFIoUcmLdK4zBXm1wBcAKBrWitVf/fvfBYDdKLjCiRnleUrYbO4WLXVCFs7MR7RlRYASKH4LtExNDfNCfpZRGP9CA/PAlGP6Di80AFnD5Z/ig5ECk0yaZEmZK4wbwBQVvmq2smARNHxBFqzccVWrjNMEx2Hp7i9Ti86hrFEV1oAII+nCu8bdOTIzEUksGcMAKSiO/N8/Gu3yBi8UA+gzFJu8rvhpTR5if90kUKeucJ8INqBy+BsgDdpHa+yZImOwxvEu0LueAEWAn0GjWp6hugYiHRR3d25e0XHcSPWLY0ia43oONzYDmCJpdxULToQKbTJpEXySEnVgU44jx94UXQsgXI4f/k2rosKaldif3C1pxWgkDtiQOTZQyMyKakYBIfoOOrrFgk8VsBJAdd9D08Oio5jAs8DOEe25Zc8IZMWyWMlVQfsJVUHvgXgHgCTagsiAVRbfLnw3869we2HGkTH4EooTA/poETroAhdjAsAAwOpxXZ7lPCeKQuxc34eHd4iOo4xrAButZSbbrWUm6yig5HCg/hPFynslFQdeArOYwd6RceileNVlpDaOuyOaq8RXklwJRQW4gJAMsWFxAnALYdLQ+LfyX14qBhEQrsFj9IMZ3Vlwp5QkjSWTFokn5RUHfgngIUAPhMdi7+cVZYrQubAQU+Roy0ke8kwKOLnhwDk8BQhBxeO1dJSsogIwnfwpKMzZwU+FLoN+7j/AFhkKTdtFx2IFH5k0iL5rKTqQC2AswE8IzoWf7TkLQu7KguRtRdwhOT6m1BYiAsARp6eIjoGAOBcH9vXm/m56DgAYCXWLjGQrVZgCI/DeYZQu8AYpDAmkxbJLyVVB6wlVQe+A+DrCMPpIgLo0LSvpomOw1vcXl+NEP33GwoLcQEgh6dMhcCutKPV1S0Kid4/eqiGO/AbEVWffjjPD/qxpdwUEhUwKTyF5IeeFH5Kqg68AedJ1mE1XdSSd842rosKu4MfVVu10Db1E2FQQiJRiII+SQETdgjpaH19mbMcDv0B0XEAwFn4ZGEWWbYG8ZEHAZxpKTe9GcRnSpOUTFokzZRUHTgE53TRs6Jj8cTxKkuq6Dh8wR0tIdW6fzQWKqUWAPEU0yo6hhFHWmeFzJTIKjxYCKJgbIN+G86EJSQSNin8yaRF0tTx6aI7AHwDQKjsVHCpNXfpp1wXPUt0HN4iUu2goZCtDoXClucRWZQ0LDqGEc3NcxeQ4IMcR2ShLa8MGwO5EHYIwF0ArrKUm8Ju2lgKXaHz6SJNKiVVB/4E53RRSCxAHIsAqpl+ZUgs1PQWd7RUA4gTHcd4QmUhLgAY1fR40TGMUFVD4sBAashMn96K3y3Wkb0xAENvB7DQUm76taXcFBJThdLkIZMWKWBKqg7UAFgMYI3oWMZqzS3bHo5VFgDgtuoO0TFMJIRmh5DHUwtFxzBafd3CkJnWM8AR/W38TsspKweAB+Bsx1+l4biSdIJMWqSAKqk6MFxSdeB2AJcACJn1BTXTr0oSHYOvuKMhRnQMEwml6aF4xGSDEDJrSbq78+aqqu6Q6DhGLMXG09Op/VMNhjoAYLGl3PSQpdwUkk0PpckhdD5dpEnteDO6OQD+IDqW1pyzP+W66Nmi4/AV8d5i0TFMhCGESi0A4hAViCkQnx09Oq3l/9u7++Cq6juP4+9vEiBQUECeiSsoIldRA2pt69DKzbZahxa7tQWm7drtdnfSTR92yna7fVg3bmes0yltV82Y3Xa12n0Qa61rsdIHoiMqMwoKpXojGB6qlYAgxYJIknu++8c5qZdAnnPvOSd8XjN3knvuOed+EyD58HuMu4ZCX+HG6bgPdOyPA98l7A7aNIRliZyUQouUTKY5dzDTnPsE8CFgb1x1bD/3I+Pieu/BCvL7dwGT466jJ0nqHgKYFJx2OO4aCu3edfGF7iRmr53p7Dnz7WwYyBTo3UC2dXH1F1sXVydmwLMMbwotUnKZ5twDhK0u95T6vV+Z9o6n8uWjMqV+36EStG1/Oe4aepes0DIjmDgq7hoKdXRUTjx69LREtUp8hlveUe4d/VnT5k7gwtbF1Y8WqSSRk1JokVhkmnP7M825FYQbL5bsF/H2OeltZQHIt8e5AnvfJGn2EMDMYML0uGvoatfOBaPjrqHQSNor/4rv92XM2V5gaevi6k+1Lq5O9JIGMjwptEisMs25NcAFhAvSFXV65J5plz+dr6hMbSsLgOcPzIy7ht6YJWPDxE7j/W1n4claM+jAgTOrg6AsUWNtFvPryyf4axu7edkJW1fmty6ufrCEZYkcR6FFYpdpzr0eLUh3JbCtWO+zbc5HE7Nmx0B4cGQ/5GfHXUdvkjYQ1zAbScXOuOs4ntmrr85KWE3wT9w4Cfe2Loe3Aoui1pVET7eX4U+hRRIj05x7DLgI+BrhBmtDZs/Uy5/OV1SeP5T3LLV8+4vJ7xsied1DABN97MG4a+hq184F57uTqOnBVbw8awEbN0RPDwMrCWcGPRFjWSJ/otAiiRJtA3ATMBe4myHqMtp2brpbWQCCtu2pmKGRxNAyIz+hPO4aumprGzPl2LG3PRN3HV19ju9eWulH7wTmtS6u/o7WXZEkUWiRRMo05/ZkmnPXE66ou6G383vSOvWyjWlvZQEIOlonxV1DXyRpw8ROM4MzEjlNfPeui5P2vfrNKI59YFf2nZ9qXVydqPVkREChRRIu05x7CrgC+DgwoB+iL5y7LFEzNQbCvf0otCV2k8TjJS+0TPZxZ+N0HasRu337Zi90tz1x1wHsB/4OWFiTbXkk7mJEuqPQIomXac55pjn338B5wDcId5Dtk9Ypl27MV4y+oGjFlUjQvnsbMCLuOvoiid1DZZSNqKBsR9x1nKis/MD+M4s2+LwPOoB/A86tybbcXpNtycdYi0ivFFokNTLNuSOZ5twNQAa4ty/XvDB3eaL36emrfPu2xA0k7U4Su4cATve3JWYPokI7dy48x50ghrf+BXBRTbbl72uyLX+I4f1F+k2hRVIn05zbnWnOLQPeDXS3rgR7p1yyKV8xen7pKiueoP2lsXHX0HfJDC3TgvFFXQdooN58c1xVe9voUg7I3Qi8vybbcnVNtiVXwvcVGTSFFkmtTHNufaY5dxmwFNjS9fUX5i4fWfqqhp67B/iRc+Ouo6+Stk5Lp6pg4vi4a+jO716aX4oZOpuBpTXZlstqsi1rS/B+IkNOoUVSL9OcexBYAFwHPAewd/LCTR0VYy6MtbAh4vnWF4HT466jr5IaWqYG42fjxV11eaBa98y9xJ1idV9tBT5MOMhWq9lKqim0yLAQDdb9CeHidCu2z/lwbLtID7V827bWuGvol0RGFhhJxbgyLFFL53dyLxvxh4PTnx/i2z4PfBS4uCbbcn9NtiWRgU2kPxRaBDPLm9lmM9tiZs+Y2bui42VmdouZ/dbMtprZ02Y2O3ptV3Rsi5n90symxftVhDLNuSDTnLunbdT4DxD+wD6h2yhtgvadFXHX0B9JbWkBGOuVSZhefFI7dlzyZ0N0qy3AcuDCmmzLjxVWZDhJ1Q9DKZqj7l4NYGZXAd8E3gMsA2YAF7l7YGZVwJGC6xa7+34zuwn4KvD5EtfdrbrGbAD8uKG26T7CnaS/DlwWb1UD48HBWXHX0B9JDi1Tffyx1/s+Y76k3nhjwuz29pFbRoxou3iAt3gcuKkm2/LwUNYlkiQKLdLVaUDn9NrpwB53DwDc/eVurnmMBAWWQnWNWQceBB5sqG16H/BF4H0kthPjeEH+0CvgM+Kuo3+SG1qq8hPHbi9PbGMLL798weHZs5/t72U/B75Zk215vAgliSSKQosAjDazzUAlYVDJRsfvBR43s0XAOuC/3P1kP1GXEA72S7S6xuwvgV821DadB3wWuB4YF29VPQvat+8ibO1KjaSu0wIwPZhwVtw19OSV389bOGvWs4fMeh14nQd+DNxck21JfReoSF9pTItA1D3k7vOAq4G7zcyilpXzgK8AAbDOzGoKrnskCjunEXYppUJdY/aFusbs54Aq4AvA9phL6la+7cUUblaX3NAyhlGTzUnswOYgqBj9+utTNvdwyl7gJuCcmmzLCgUWOdWopUWO4+4bzGwSMBnY5+7HgIeBh81sL3AtYasLRGNaYip10Ooas68DtzTUNt0KXEXYxXU1Ceo68vy+RAxw7g9L+H+GRjPqpTc4ltjv644dl0xfsOCEYSmPArcDP63JtrSXvCiRhFBokeOY2TygHDhgZguBVnd/xczKCKcT/ybWAosgGveyFljbUNt0LmHX0ScJW5Bi48Gbh6BjTpw1DExyW1oAJgfjjuwuPxZ3Gd06/MdJczs6Kp6vqOiYAdwFNNZkW5rjrkskCRRaBN4a0wJhK8P17p43synA981sVPTaU8BtsVRYInWN2e3AFxpqm74GfIIwvLw9jlqC9p3bgUvjeO/BSPLsIYCZwRmVu8sT3UD4xK5dC26dM+fpB2uyLcmc6iQSE4UWwd3Luzm+lrAF4mSvzSpmTXGra8weJmyOv72htikD/CVhiJlZqhry7dsOl+q9TiUzgglJHNjcStiqckd9fX2cuz6LJJpCi0gv6hqzOeArUetLDWGAuRYo6iaGQcfvJxTz/sUSdSUm1uk+5kycQ/Q+Q6fYDgEPAKuBX9XX16dw0LVIaSm0iPRRtGDdr4BfNdQ2jSZctG4F8H5gVE/X9pd7vh1/c+5Q3rOEEt09ZJiNYsTOY7RXx/D2h4GfAfcAv6ivr0/u4BqRBFJoERmAusbsUcJ1bO5tqG0aD/wF4bYBVzIEASboeHkbcMFg7xOPZLe0AEwMxh7aU36w9xOHxlHgIcIWlYfq6+s1TkVkgBRaRAaprjH7B+AO4I6G2qaxwJ8TLrh3DeFiff0WtG1L9EjRnljCW1oAZgYTyoscWg4Rjgf7P+Bn9fX1Gp8kMgQUWkSGUDSA9wHggYbaJgMWEgaYJcAl9PEXetCxu7JoRRZdsmcPAcwMzpi6kR1DfdvtwBrC7p/1GqMiMvQUWkSKJFr/ZVP0uLGhtmkaYevLEuC99DCQ14PXzylJkUWQ9MXlAM7wsbNx3sQYTDg8DDxCtMZPfX39kKcgETmeQotIidQ1Zlt5qxtpJLCIcDft9wCXE42FCfL7dwKz46pz8JLf0lJGWUUF5ds7yGf6cdkRYAOwnnCT0Cfr6+vbilJgCZlZnnDvMCPc0+iz7v6kmc0C1rj7fDO7krCrawcwhnA7gW+5+5pYipZTlkKLSAzqGrNthNshrANoqG0aRRhc3h20tZxJOCsp0Zs5dsuS39ICMN7H7N9vf+zplNeAxwkDynrgmWHa5XPU3asBzOwqwn3E3nOS89a7+5LovGrgATM76u7rTnKuSFEotIgkQF1j9hjhL8fHIMuqZUs+Qzh76F3AO6NHSqZAJ7+lBWB6MMH3l/0ptATAC4RdeU8S/lk8X19f7zGVF5fTgF5HKLv7ZjP7V8ItLxRapGQUWkQSaOXqNQFhk/1W4N8BVi1bMgG4sMtjPjHvkdRVCsa0tAG5quCMTVv53f2EQWXzKTzDp3Mbj0rC2W7ZPl73DPClolUlchIKLSIpsXL1moP8qTXmLauWLTmLE8PMecCIUtcYSUpoeYNwDMaLhDN7fgNsAZqrbl7UXgVcztIYy0uMwu6hdwJ3m9n8PlyXihY1GV4UWkRSbuXqNbuB3YTTbQFYtWzJSGAO4YDezsfZ0cezgPHFq6ik3UN/JAwlLdHHwscrVTcvOtW6dwbF3TeY2SRgch9OXwDkilySyHEUWkSGoZWr17QBz0ePE6xatmQsUNXlMQ2YQBhoxnf5fHQ/3n6wLS1twKvAvuhj52NP10fVzYteG+R7SQEzmweUAwcIZwl1d95FwD8Dny5RaSKAQovIKWnl6jWHgebo0atVy5aM4sQgM5awC6oi+jgCqOgI2oKR5ZUju9wiIFzO/ghht80b3X1edfMi7cdTWp1jWiDs8rne3fNmVgEU/lksMrNnCcPMPuDzmjkkpWbuaj0VEZHjmdlS4GPu/tG4axHppJYWERE5TjSdeSnwyZhLETlOUkb5i/SLmR02s1lmdtTMnjWznJk9ZWbXdznvWjO7Ifr8h2Z2Xdf7RB8nm9na0n0FIsnl7je4+8Xu/mzctYgUUkuLpF2Luy8AMLOzgfvNrMzd74xe/0fgg73dxN1fNbM9ZnaFuz9RxHpFRGSA1NIiw4a77wC+CHwewMzmAsfcfX8fb/EA8LEilSciIoOk0CLDzTPAvOjzK6LnfbWRcBNDERFJIIUWGW4KFzabTri+R6eTTZUrPLYPmFGMokREZPAUWmS4KVyl8yjhfiqdDhCuMwKAmU0ECruOKqNrRIacmU0zs3vMrMXMnjezn5vZ3Ggw+eboWKOZlUWDzN3MvlFw/SQzazez2+L8OkTipNAiw4aZzQK+DdwaHcoRLmXf6VFgmZl1Lnz2SeCRgtfnAr8tZo1yajIzA34KPOru57j7+cBXgamEg8mrgYuA84Fro8t2AEsKbvMR4LnSVS2SPAotkjpdVuo8p3PKM3AvcGvBzKHHgAXRLwzcfQ2wHtgUrQB6BfDlglsvBh4qxdcgp5zFQLu7N3YecPfNwEsFzzuAJ3kraB8FcmZ2afR8GeHfcZFTlqY8SxpdQPi/0130sCeOu79hZr8GaoBfR8duBG7s5pIPgrb9laKYD2zq6QQzG0P4d/WGgsP3AMvNrBXIA6+gcVdyClNLi6SKmdUC/wt8vY+X3EQPG78V3Hcy8B13PziI8kQG4pyo5e8J4CF3f7jgtbXAe4EVwOo4ihNJErW0SKpEzeuNvZ741vl7gQf7cN6rhOu0iBTDc8B13bzWOablBO7eZmabgJWELYwfKFJ9IqmglhYRkeJrAkaZ2d90HjCzy4Cz+nDtKuDL7n6gWMWJpIVCi0gCmdmHoimv86Lns8zst2Z2VTQ9dnO0/9IL0ed3R+e93cwei443m9kPorESEiN3d+BDwHujKc/PAfWEY1R6u/Y5d7+ryCWKpIKF/5ZEJEnM7F7CxfHWuXt9NJ17jbvPLzjnUeAf3H1j9Hwq8BSw3N03RLOmPgysj7rJhgUzywNbgRFAB3AX8D13D8zsSsJp7J929/+Mzl9AuDLyl9z922b2Q8Lv5X3RWj3rgFsKZp2JSEKppUUkYcxsLOF07L8Glvfj0jrgLnffAOH/7t39vuEUWCJH3b3a3S8gHKR6DfAvBa9vJZwe3Gk5sKXrTczsdOAXwH8osIikg0KLSPJcC6x1923Aa2a2sI/X9Tqtdrhx933A3wKf7VyPB/gdUGlmU6NjVwMPd7l0bHTsf9z99pIVLCKDotAikjwrCNfnIPq4IsZaEi/a3bsMmFJw+D7CFWTfRdg1dKzLZd8BHnf375akSBEZEgotIgliZmcAWeAHZrYL+BJhV4f1dF3kOeCS4lWXaF2/P/cShpYVhOv6dNUELDWzKSd5TUQSSqFFJFmuA+5297PcfZa7nwnsBKr6cO1twPVmdnnnATP7uJlNK1KtiWBmZxOuFruv85i7twLthGNe1p3ksnuA24Gfm9m4UtQpIoOnxeVEkmUFcHOXYz8h3FyvR+6+18yWA9+OWhACwv2X7h/yKhMiWsm4EbjN3f2tYS1AuBz+FHfPdzkOgLt/z8ymAz81s2vcva0kRYvIgCm0iCSIu195kmO3ALf08dwNwKJi1JYgo6Nl7zunPP+IcIzKcdz9yd5u5O5fNrM7gR+Z2Qp3D4a8WhEZMlqnRURERFJBY1pEREQkFRRaREREJBUUWkRERCQVFFpEREQkFRRaREREJBUUWkRERCQVFFpEREQkFRRaREREJBUUWkRERCQVFFpEREQkFRRaREREJBUUWkRERCQVFFpEREQkFRRaREREJBUUWkRERCQVFFpEREQkFRRaREREJBUUWkRERCQVFFpEREQkFRRaREREJBUUWkRERCQVFFpEREQkFRRaREREJBUUWkRERCQVFFpEREQkFRRaREREJBUUWkRERCQVFFpEREQkFRRaREREJBUUWkRERCQVFFpEREQkFRRaREREJBUUWkRERCQVFFpEREQkFRRaREREJBUUWkRERCQVFFpEREQkFf4frrkmUjSxFekAAAAASUVORK5CYII=\n" + }, + "metadata": {} + } + ], + "source": [ + "# Candidates with 1st position in their respective constituiency\n", + "all_winners = candidate_2009[candidate_2009.Position ==1].Party_Abbreviation.value_counts()\n", + "top_10_winners = all_winners[:9] \n", + "# count of other regional parties\n", + "top_10_winners['Others'] = all_winners.sum() - top_10_winners.sum()\n", + "# Pie chart\n", + "top_10_winners.plot.pie(autopct='%.f%%', \n", + " figsize=(10,10), \n", + " title='Top 10 parties with majority seats')\n", + "plt.legend(loc='upper right')\n", + "plt.ylabel('')\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Insight: INC have won almost 38% of the total seats followed by BJP with 21% seats." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Task 10 : Plot a pie diagram for top 10 states with most number of seats" + ] + }, + { + "cell_type": "code", + "execution_count": 47, + "metadata": {}, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "\n" + }, + "metadata": {} + } + ], + "source": [ + "# Top 9 states with maximum number of seats\n", + "top_10_seats = electors_2009.STATE.value_counts()[:9]\n", + "\n", + "# Sum of other states\n", + "top_10_seats['Others'] = electors_2009.STATE.value_counts().sum() - top_10_seats.sum()\n", + "\n", + "# Function to convert percentages into actual values\n", + "def autopct_format(values):\n", + " def my_format(pct):\n", + " total = sum(values)\n", + " val = int(round(pct*total/100.0))\n", + " return '{val:d} ({pct:.0f}%)'.format(val=val,pct=pct)\n", + " return my_format\n", + "\n", + "# PLotting the pie chart\n", + "top_10_seats.plot.pie(autopct=autopct_format(top_10_seats.values), \n", + " figsize=(10,10), \n", + " title=\"Top 10 States with most number of seats\")\n", + "plt.ylabel('')\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Insight: Uttar Pradesh has the highest number of seats (80 or 15%) followed by Maharashtra (48 or 9%).\n", + "\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.0-final" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} \ No newline at end of file diff --git a/sales_campaign_analysis/notebook/Manipulating data with Numpy - Code Walkthrough-MK.ipynb b/sales_campaign_analysis/notebook/Manipulating data with Numpy - Code Walkthrough-MK.ipynb new file mode 100644 index 0000000..da6b267 --- /dev/null +++ b/sales_campaign_analysis/notebook/Manipulating data with Numpy - Code Walkthrough-MK.ipynb @@ -0,0 +1,1515 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Sales Campaign analysis\n", + "\n", + "__An introduction to the Facebook advertising platform__
\n", + "Along with Google's search and display networks, Facebook is one of the big players when it comes to online advertising. As Facebook users interact with the platform, adding demographic information, liking particular pages and commenting on specific posts, Facebook builds a profile of that user based on who they are and what they're interested in.
\n", + "This fact makes Facebook very attractive for advertisers. Advertisers can create Facebook adverts, then create an 'Audience' for that advert or group of adverts. Audiences can be built from a range of attributes including gender, age, location and interests. This specific targetting means advertisers can tailor content appropriately for a specific audience, even if the product being marketed is the same.
\n", + "\n", + "__What do we need from our Facebook ads analysis?__
\n", + "When it comes to analysing the Facebook adverts dataset, there are a lot of questions we can ask, and a lot of insight we can generate. However, from a business perspective we want to ask questions that will give us answers we can use to improve business performance.
\n", + "Without knowing anything of the company's marketing strategy or campaign objectives, we do not know which key performance indicators (KPIs) are the most important. For example, a new company may be focussed on brand awareness and may want to maximise the amount of impressions, being less concerned about how well these adverts perform in terms of generating clicks and revenue. Another company may simply want to maximise the amount of revenue, while minimising the amount it spends on advertising.
\n", + "As these two objectives are very different, it is important to work with the client to understand exactly what they are hoping to achieve from their marketing campaigns before beginning any analysis in order to ensure that our conclusions are relevant and, in particular, actionable. There's not much point in producing a report full of insight, if there's nothing the client can do about it.\n", + "\n", + "\n", + "\n", + "__Understanding the dataset__
\n", + "The data used in this project is from an anonymous organisation’s social media ad campaign. The data contains 1143 observations in 11 variables. Below are the descriptions of the variables. Since you are working with numpy, refer the `Feature Index` column for the indices of every feature.\n", + "\n", + "|Feature Index|Features|Description|\n", + "|----|----|----|\n", + "|0|ad_id| unique ID for each ad|\n", + "|1|xyz_campaign_id| an ID associated with each ad campaign of XYZ company|\n", + "|2|fb_campaign_id| an ID associated with how Facebook tracks each campaign|\n", + "|3|age| age of the person to whom the ad is shown|\n", + "|4|gender| gender of the person to whom the add is shown|\n", + "|5|interest| a code specifying the category to which the person’s interest belongs (interests are as mentioned in the person’s Facebook public profile)|\n", + "|6|Impressions| the number of times the ad was shown|\n", + "|7|Clicks| number of clicks on for that ad|\n", + "|8|Spent| Amount paid by company xyz to Facebook, to show that ad|\n", + "|9|Total conversion| Total number of people who enquired about the product after seeing the ad|\n", + "|10|Approved conversion| Total number of people who bought the product after seeing the ad|" + ] + }, + { + "attachments": { + "NumPy_code_along.PNG": { + "image/png": "" + } + }, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Below is a snapshot of the data you will be working with![NumPy_code_along.PNG](attachment:NumPy_code_along.PNG)" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import pandas as pd\n", + "import warnings\n", + "warnings.filterwarnings('ignore')\n", + "import sys" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "# Command to display all the columns of a numpy array\n", + "np.set_printoptions(threshold=sys.maxsize)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Let's load the data" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ad_idxyz_campaign_idfb_campaign_idagegenderinterestImpressionsClicksSpentTotal_ConversionApproved_Conversion
070874691610391630-34M15735011.4321
170874991610391730-34M161786121.8220
270877191610392030-34M2069300.0010
370881591610392830-34M28425911.2510
470881891610392830-34M28413311.2911
\n", + "
" + ], + "text/plain": [ + " ad_id xyz_campaign_id fb_campaign_id age gender interest \\\n", + "0 708746 916 103916 30-34 M 15 \n", + "1 708749 916 103917 30-34 M 16 \n", + "2 708771 916 103920 30-34 M 20 \n", + "3 708815 916 103928 30-34 M 28 \n", + "4 708818 916 103928 30-34 M 28 \n", + "\n", + " Impressions Clicks Spent Total_Conversion Approved_Conversion \n", + "0 7350 1 1.43 2 1 \n", + "1 17861 2 1.82 2 0 \n", + "2 693 0 0.00 1 0 \n", + "3 4259 1 1.25 1 0 \n", + "4 4133 1 1.29 1 1 " + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import csv\n", + "# Load the data\n", + "conversion_data = pd.read_csv('../data/KAG_conversion_data.csv')\n", + "\n", + "# Remove the header\n", + "\n", + "# Convert the data into a numpy array and store it in sales_data\n", + "conversion_data.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Let's delve into the data to find the answers to some questions" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## How many unique ad campaigns (xyz_campaign_id) does this data contain ? And for how many times was each campaign run ?" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Total number of campaigns" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The total number of campaigns are: 3\n", + "They are: [ 916 936 1178]\n" + ] + } + ], + "source": [ + "print('The total number of campaigns are:',conversion_data.xyz_campaign_id.nunique())\n", + "print('They are:',conversion_data.xyz_campaign_id.unique())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Total times each campaign ran" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The number of time each campaign ran is given as: {916: 54, 936: 464, 1178: 625}\n" + ] + } + ], + "source": [ + "print('The number of time each campaign ran is given as:',\n", + " conversion_data.xyz_campaign_id.value_counts(sort=False).to_dict())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## What are the age groups that were targeted through these ad campaigns ?" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The age groups targeted are: ['30-34' '35-39' '40-44' '45-49']\n" + ] + } + ], + "source": [ + "# Age groups are categorized as bins. So get a unique count of the bin\n", + "print('The age groups targeted are:',conversion_data.age.unique())\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## What was the average, minimum and maximum amount spent on the ads ?" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "mean 51.360656\n", + "min 0.000000\n", + "max 639.949998\n", + "Name: Spent, dtype: float64" + ] + }, + "execution_count": 33, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "conversion_data.Spent.agg(['mean','min','max'])" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Minimum amt spent on ads was 0.0\n", + "Maximum amt spent on ads was 639.95\n", + "Average amt spent on ads was 51.36\n" + ] + } + ], + "source": [ + "avg_amt, min_amt, max_amt = conversion_data.Spent.describe()[['mean','min','max']].round(2)\n", + "print('Minimum amt spent on ads was ',min_amt)\n", + "print('Maximum amt spent on ads was ',max_amt)\n", + "print('Average amt spent on ads was ',avg_amt)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## What is the id of the ad having the maximum number of clicks ?" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### What were the maximum number of clicks" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The maximum number of clicks were 421\n" + ] + } + ], + "source": [ + "print('The maximum number of clicks were',conversion_data.Clicks.max())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Which was the ad having the maximum number of clicks" + ] + }, + { + "cell_type": "code", + "execution_count": 45, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The advertisement with the maximum number of clicks was the one with id 1121814\n" + ] + } + ], + "source": [ + "print('The advertisement with the maximum number of clicks was the one with id',\n", + " conversion_data.ad_id[conversion_data.Clicks.argmax()])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## How many people bought the product after seeing the ad with most clicks ? Is that the maximum number of purchases in this dataset ?" + ] + }, + { + "cell_type": "code", + "execution_count": 44, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Number of people who bought the product having maximum ad clicks is 13\n", + "The maximum number of purchases was 21\n" + ] + } + ], + "source": [ + "# Max value of the Approved_Conversion column\n", + "print('Number of people who bought the product having maximum ad clicks is',\n", + " conversion_data.Approved_Conversion[conversion_data.Clicks.argmax()])\n", + "\n", + "if conversion_data.Approved_Conversion[conversion_data.Clicks.argmax()] == \\\n", + " conversion_data.Approved_Conversion.max():\n", + " print('The maximum sales were on this product')\n", + "else:\n", + " print('The maximum number of purchases was',\n", + " conversion_data.Approved_Conversion.max())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## So the ad with the most clicks didn't fetch the maximum number of purchases. Let's find the details of the product having maximum number of purchases" + ] + }, + { + "cell_type": "code", + "execution_count": 49, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The ad id for the product having maximum number of purchases is: 1121104\n", + "The record for this product is as shown below\n", + "ad_id 1121104\n", + "xyz_campaign_id 1178\n", + "fb_campaign_id 144533\n", + "age 30-34\n", + "gender M\n", + "interest 16\n", + "Impressions 2080666\n", + "Clicks 202\n", + "Spent 360.15\n", + "Total_Conversion 40\n", + "Approved_Conversion 21\n", + "Name: 528, dtype: object\n" + ] + } + ], + "source": [ + "print(\"The ad id for the product having maximum number of purchases is:\",\n", + " conversion_data.ad_id[conversion_data.Approved_Conversion.argmax()])\n", + "print('The record for this product is as shown below')\n", + "print(conversion_data.iloc[conversion_data.Approved_Conversion.argmax()])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Creating additional features\n", + "\n", + "Let's add some additional features that will represent some additional standard metrics." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Click Through Rate (CTR)\n", + "This is the percentage of how many of our impressions became clicks. A high CTR is often seen as a sign of good creative being presented to a relevant audience. A low click through rate is suggestive of less-than-engaging adverts (design and / or messaging) and / or presentation of adverts to an inappropriate audience. What is seen as a good CTR will depend on the type of advert (website banner, Google Shopping ad, search network test ad etc.) and can vary across sectors, but 2% would be a reasonable benchmark.\n", + "\n", + "### Create a new feature `Click Through Rate` (CTR) and then concatenate it to the original numpy array \n", + "\n", + "CTR = $\\frac{Clicks}{Impressions}$x100" + ] + }, + { + "cell_type": "code", + "execution_count": 59, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ad_idxyz_campaign_idfb_campaign_idagegenderinterestImpressionsClicksSpentTotal_ConversionApproved_ConversionCTR
070874691610391630-34M15735011.43210.013605
170874991610391730-34M161786121.82200.011198
270877191610392030-34M2069300.00100.000000
370881591610392830-34M28425911.25100.023480
470881891610392830-34M28413311.29110.024195
\n", + "
" + ], + "text/plain": [ + " ad_id xyz_campaign_id fb_campaign_id age gender interest \\\n", + "0 708746 916 103916 30-34 M 15 \n", + "1 708749 916 103917 30-34 M 16 \n", + "2 708771 916 103920 30-34 M 20 \n", + "3 708815 916 103928 30-34 M 28 \n", + "4 708818 916 103928 30-34 M 28 \n", + "\n", + " Impressions Clicks Spent Total_Conversion Approved_Conversion CTR \n", + "0 7350 1 1.43 2 1 0.013605 \n", + "1 17861 2 1.82 2 0 0.011198 \n", + "2 693 0 0.00 1 0 0.000000 \n", + "3 4259 1 1.25 1 0 0.023480 \n", + "4 4133 1 1.29 1 1 0.024195 " + ] + }, + "execution_count": 59, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "conversion_data['CTR'] = (conversion_data.Clicks\n", + " .divide(conversion_data.Impressions)\n", + " .multiply(100)\n", + " )\n", + "conversion_data.head()\n" + ] + }, + { + "cell_type": "code", + "execution_count": 63, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 63, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXoAAAD4CAYAAADiry33AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAASAklEQVR4nO3db2xd913H8fe3tw5eS1mo4mzFyZYyUnDkiVG8LrAACluh6VBTntBaGpUqQxZp8f4IOmVYCHgQKUgr2ppWbaM5aBWT+6AMlLGK7EE8CUu0xNlGSWZaeWVb0hTqUZEtS1K7yZcHvmlc9yb3OLFz7V/eL8lKzu/Pvd8r1Z/8es7vnBuZiSSpXNe0ugBJ0sIy6CWpcAa9JBXOoJekwhn0klS4a1tdQCMrVqzINWvWtLoMSVoyDh48+MPM7GjUtyiDfs2aNYyOjra6DElaMiLi+xfq89SNJBXOoJekwhn0klQ4g16SCmfQS1LhDHqpgqGhIbq7u6nVanR3dzM0NNTqkqTKFuX2SmkxGRoaYmBggMHBQTZs2MDIyAh9fX0A9Pb2trg6qblYjI8p7unpSffRa7Ho7u5m165dbNy48Y224eFh+vv7OXToUAsrk86LiIOZ2dOwz6CXLq5Wq3H69Gna2treaJuamqK9vZ0zZ860sDLpvIsFvefopSa6uroYGRl5U9vIyAhdXV0tqkiaG4NeamJgYIC+vj6Gh4eZmppieHiYvr4+BgYGWl2aVIkXY6Umzl1w7e/vZ2xsjK6uLnbs2OGFWC0ZnqOXpAJ4jl6SrmIGvSQVzqCXpMJVCvqIuCMino+I8YjY3qD/lyLiXyPitYj407nMlSQtrKZBHxE14BFgE7AO6I2IdbOGvQp8AvjcJcyVJC2gKiv624DxzHwxMyeBJ4HNMwdk5iuZeQCYmutcSdLCqhL0ncCRGcdH621VVJ4bEVsiYjQiRicmJiq+vCSpmSpBHw3aqm6+rzw3M3dnZk9m9nR0NPwic0nSJagS9EeB1TOOVwHHKr7+5cyVJM2DKkF/AFgbETdHxDLgXmBvxde/nLmSpHnQ9Fk3mfl6RGwD9gE1YE9mHo6IrfX+xyLincAo8DPA2Yj4FLAuM3/UaO5CfRhJ0lv5rBtJKoDPupGkq5hBL0mFM+glqXAGvSQVzqCXpMIZ9JJUOINekgpn0EtS4Qx6SSqcQS9JhTPoJalwBr0kFc6gl6TCGfSSVDiDXpIKZ9BLUuEMekkqnEEvSYUz6CWpcAa9JBXOoJcqGBoaoru7m1qtRnd3N0NDQ60uSars2lYXIC12Q0NDDAwMMDg4yIYNGxgZGaGvrw+A3t7eFlcnNReZ2eoa3qKnpydHR0dbXYYEQHd3N7t27WLjxo1vtA0PD9Pf38+hQ4daWJl0XkQczMyehn0GvXRxtVqN06dP09bW9kbb1NQU7e3tnDlzpoWVSeddLOg9Ry810dXVxcjIyJvaRkZG6OrqalFF0twY9FITAwMD9PX1MTw8zNTUFMPDw/T19TEwMNDq0qRKvBgrNXHugmt/fz9jY2N0dXWxY8cOL8RqyfAcvSQV4LLP0UfEHRHxfESMR8T2Bv0REQ/V+5+LiFtn9H06Ig5HxKGIGIqI9kv/KJKkuWoa9BFRAx4BNgHrgN6IWDdr2CZgbf1nC/BofW4n8AmgJzO7gRpw77xVL0lqqsqK/jZgPDNfzMxJ4Elg86wxm4EnctozwPKIuKnedy3wtoi4FrgOODZPtUuSKqgS9J3AkRnHR+ttTcdk5kvA54AfAC8DxzPz65deriRprqoEfTRom30Ft+GYiPhZplf7NwM/B1wfER9t+CYRWyJiNCJGJyYmKpQlSaqiStAfBVbPOF7FW0+/XGjMh4H/ysyJzJwCvgL8eqM3yczdmdmTmT0dHR1V65ckNVEl6A8AayPi5ohYxvTF1L2zxuwF7qvvvlnP9Cmal5k+ZbM+Iq6LiAA+BIzNY/2SpCaa3jCVma9HxDZgH9O7ZvZk5uGI2Frvfwx4GrgTGAdOAvfX+56NiKeAbwKvA98Cdi/EB5EkNeYNU5JUAB9qJklXMYNekgpn0EtS4Qx6SSqcQS9JhTPoJalwBr0kFc6gl6TCGfSSVDiDXpIKZ9BLUuEMekkqnEEvSYUz6CWpcAa9JBXOoJekwhn0klQ4g16SCmfQS1LhDHpJKpxBL0mFM+glqXAGvSQVzqCXpMIZ9JJUOINekgpn0EtS4Qx6qYKhoSG6u7up1Wp0d3czNDTU6pKkyq5tdQHSYjc0NMTAwACDg4Ns2LCBkZER+vr6AOjt7W1xdVJzlVb0EXFHRDwfEeMRsb1Bf0TEQ/X+5yLi1hl9yyPiqYj4z4gYi4hfm88PIC20HTt2MDg4yMaNG2lra2Pjxo0MDg6yY8eOVpcmVRKZefEBETXgBeB24ChwAOjNzO/MGHMn0A/cCXwA+EJmfqDe9yXgXzLzixGxDLguM//vYu/Z09OTo6Ojl/6ppHlUq9U4ffo0bW1tb7RNTU3R3t7OmTNnWliZdF5EHMzMnkZ9VVb0twHjmfliZk4CTwKbZ43ZDDyR054BlkfETRHxM8BvAoMAmTnZLOSlxaarq4uRkZE3tY2MjNDV1dWiiqS5qRL0ncCRGcdH621Vxvw8MAH8bUR8KyK+GBHXN3qTiNgSEaMRMToxMVH5A0gLbWBggL6+PoaHh5mammJ4eJi+vj4GBgZaXZpUSZWLsdGgbfb5nguNuRa4FejPzGcj4gvAduDP3zI4czewG6ZP3VSoS7oizl1w7e/vZ2xsjK6uLnbs2OGFWC0ZVYL+KLB6xvEq4FjFMQkczcxn6+1PMR300pLS29trsGvJqnLq5gCwNiJurl9MvRfYO2vMXuC++u6b9cDxzHw5M/8bOBIRv1gf9yHgO0hLjPvotZQ1XdFn5usRsQ3YB9SAPZl5OCK21vsfA55mesfNOHASuH/GS/QDX67/I/HirD5p0XMfvZa6ptsrW8HtlVpMuru72bVrFxs3bnyjbXh4mP7+fg4dOtTCyqTzLra90qCXmnAfvZaCy91HL13V3Eevpc6gl5pwH72WOh9qJjXhPnotda7oJalwruilJoaGhti6dSunTp3i7NmzvPDCC2zduhVwe6WWBlf0UhPbtm3jxIkT7Ny5k5/85Cfs3LmTEydOsG3btlaXJlVi0EtNvPrqq9xzzz3s2bOHG264gT179nDPPffw6quvtro0qRKDXqpg//797Nq1i9OnT7Nr1y7279/f6pKkygx6qYKTJ09e9FhazLwYK1Vw4sQJbr/9ds6cOUOtVuPs2bOtLkmqzBW91MSqVatob2/nmmumf12uueYa2tvbWbVqVYsrk6ox6KUKli9fzr59+5icnGTfvn0sX7681SVJlRn0UhPHjh3j7rvvZtOmTSxbtoxNmzZx9913c+zY7O/fkRYnn14pNbF69WpeeeUVJicn32hbtmwZK1eu5MiRIxeZKV05Pr1SugwTExNMTk5y1113MTExwV133cXk5CR+ib2WCnfdSE289tprdHZ28tWvfpWOjg4igs7OTl566aVWlyZV4opequDYsWOsXLkSgJUrV3p+XkuKQS9VcO5aVkS86VhaCgx6qaKTJ08SEd4VqyXHoJcq6Ozs5MSJE5w9e5YTJ07Q2dnZ6pKkygx6qYKXXnrpTXfGeiFWS4lBLzVx/fXXA3DmzJk3/XmuXVrsDHqpiVOnThERvOMd73jTn6dOnWp1aVIlBr3UxNmzZ3nggQdYsWIFEcGKFSt44IEHfIKllgwfgaCr1rmtklfCYvw9U1l8BILUQGZW+rnxxhup1Wo8+OCDrP70Uzz44IPUajVuvPHGyq8htZKPQJCaePjhh/nYxz7G9u3bmZqaYntbG9dddx0PP/xwq0uTKqm0oo+IOyLi+YgYj4jtDfojIh6q9z8XEbfO6q9FxLci4p/mq3DpSunt7eXxxx/nlltugbiGW265hccff5ze3t5WlyZV0jToI6IGPAJsAtYBvRGxbtawTcDa+s8W4NFZ/Z8Exi67WqlFent7OXToEO/+zF4OHTpkyGtJqbKivw0Yz8wXM3MSeBLYPGvMZuCJnPYMsDwibgKIiFXAR4AvzmPdkqSKqgR9JzDz2xWO1tuqjvk88BngonvRImJLRIxGxKjP+Zak+VMl6BvtQZu9jaDhmIj4PeCVzDzY7E0yc3dm9mRmT0dHR4WyJElVVAn6o8DqGcergNkP477QmA8Cd0XE95g+5fPbEfF3l1ytJGnOqgT9AWBtRNwcEcuAe4G9s8bsBe6r775ZDxzPzJcz87OZuSoz19Tn7c/Mj87nB5AkXVzTffSZ+XpEbAP2ATVgT2Yejoit9f7HgKeBO4Fx4CRw/8KVLEmai0o3TGXm00yH+cy2x2b8PYGPN3mNbwDfmHOFkqTL4iMQJKlwBr0kFc6gl6TCGfSSVDiDXpIKZ9BLUuEMekkqnEEvSYUz6CWpcAa9JBXOoJekwhn0klQ4g16SCmfQS1LhDHpJKpxBL0mFM+glqXAGvSQVzqCXpMIZ9JJUOINekgpn0EtS4Qx6SSqcQS9JhTPoJalw17a6AGm+/PJffZ3jp6YW/H3WbP/agr7+29/Wxr//xe8s6Hvo6mLQqxjHT03xvZ0faXUZl22h/yHR1cdTN5JUuEpBHxF3RMTzETEeEdsb9EdEPFTvfy4ibq23r46I4YgYi4jDEfHJ+f4AkqSLaxr0EVEDHgE2AeuA3ohYN2vYJmBt/WcL8Gi9/XXgTzKzC1gPfLzBXEnSAqqyor8NGM/MFzNzEngS2DxrzGbgiZz2DLA8Im7KzJcz85sAmfljYAzonMf6JUlNVAn6TuDIjOOjvDWsm46JiDXArwDPzrVISdKlqxL00aAt5zImIn4a+HvgU5n5o4ZvErElIkYjYnRiYqJCWZKkKqoE/VFg9YzjVcCxqmMioo3pkP9yZn7lQm+Smbszsyczezo6OqrULkmqoErQHwDWRsTNEbEMuBfYO2vMXuC++u6b9cDxzHw5IgIYBMYy82/mtXJJUiVNb5jKzNcjYhuwD6gBezLzcERsrfc/BjwN3AmMAyeB++vTPwj8IfAfEfHtetufZebT8/sxJEkXUunO2HowPz2r7bEZf0/g4w3mjdD4/L0k6QrxzlhJKpxBL0mF86FmKsYNXdt575fe8oSOJeeGLoCl/3A2LR4GvYrx47GdPr1SasBTN5JUOINekgpn0EtS4Qx6SSqcQS9JhTPoJalwBr0kFc6gl6TCGfSSVDjvjFVRSrir9O1va2t1CSqMQa9iXInHH6zZ/rUiHrOgq4unbiSpcAa9JBXOoJekwhn0klQ4g16SCmfQS1LhDHpJKpxBL0mFM+glqXAGvSQVzqCXpMIZ9JJUOINekgpn0EtS4SoFfUTcERHPR8R4RGxv0B8R8VC9/7mIuLXqXEnSwmoa9BFRAx4BNgHrgN6IWDdr2CZgbf1nC/DoHOZKkhZQlRX9bcB4Zr6YmZPAk8DmWWM2A0/ktGeA5RFxU8W5kqQFVOUbpjqBIzOOjwIfqDCms+JcACJiC9P/N8C73vWuCmVJlyciLm3eX899TmZe0ntJ86HKir7Rb8Ps/2ovNKbK3OnGzN2Z2ZOZPR0dHRXKki5PZl6xH6mVqqzojwKrZxyvAo5VHLOswlxJ0gKqsqI/AKyNiJsjYhlwL7B31pi9wH313TfrgeOZ+XLFuZKkBdR0RZ+Zr0fENmAfUAP2ZObhiNha738MeBq4ExgHTgL3X2zugnwSSVJDsRjPH/b09OTo6Giry5CkJSMiDmZmT6M+74yVpMIZ9JJUOINekgpn0EtS4RblxdiImAC+3+o6pAZWAD9sdRFSA+/OzIZ3my7KoJcWq4gYvdDOBmmx8tSNJBXOoJekwhn00tzsbnUB0lx5jl6SCueKXpIKZ9BLUuGqPI9eumpExDuBzwPvB14D/ofpb0V7AXgXcLz+80Pgj4Ax4Hmmv3thFOjLzKkrX7l0Ya7opbqY/m7BfwC+kZnvycx1wKeB383M9zH9XQoPZOb7MvPD9Wnfrfe9l+kv1vmDVtQuXYwreum8jcBU/TsWAMjMb1eZmJlnIuLfmP6eZGlRcUUvndcNHLyUiRHRzvQpnn+e14qkeWDQS5fnPRHxbeB/gR9k5nOtLkiazaCXzjsM/Ooc55w7R/8LwPqIuGv+y5Iuj0Evnbcf+KmI+ONzDRHx/oj4rWYTM/NlYDvw2QWsT7okBr1Ul9O3if8+cHtEfDciDgN/CRyr+BL/CFwXEb+xQCVKl8RHIEhS4VzRS1LhDHpJKpxBL0mFM+glqXAGvSQVzqCXpMIZ9JJUuP8HeyI1WUzTIMMAAAAASUVORK5CYII=\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "conversion_data.CTR.plot.box()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Bonus: Conversion Rate (CVR)" + ] + }, + { + "cell_type": "code", + "execution_count": 64, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ad_idxyz_campaign_idfb_campaign_idagegenderinterestImpressionsClicksSpentTotal_ConversionApproved_ConversionCTRCVR
070874691610391630-34M15735011.43210.0136050.027211
170874991610391730-34M161786121.82200.0111980.011198
270877191610392030-34M2069300.00100.0000000.144300
370881591610392830-34M28425911.25100.0234800.023480
470881891610392830-34M28413311.29110.0241950.024195
\n", + "
" + ], + "text/plain": [ + " ad_id xyz_campaign_id fb_campaign_id age gender interest \\\n", + "0 708746 916 103916 30-34 M 15 \n", + "1 708749 916 103917 30-34 M 16 \n", + "2 708771 916 103920 30-34 M 20 \n", + "3 708815 916 103928 30-34 M 28 \n", + "4 708818 916 103928 30-34 M 28 \n", + "\n", + " Impressions Clicks Spent Total_Conversion Approved_Conversion \\\n", + "0 7350 1 1.43 2 1 \n", + "1 17861 2 1.82 2 0 \n", + "2 693 0 0.00 1 0 \n", + "3 4259 1 1.25 1 0 \n", + "4 4133 1 1.29 1 1 \n", + "\n", + " CTR CVR \n", + "0 0.013605 0.027211 \n", + "1 0.011198 0.011198 \n", + "2 0.000000 0.144300 \n", + "3 0.023480 0.023480 \n", + "4 0.024195 0.024195 " + ] + }, + "execution_count": 64, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "conversion_data['CVR'] = (conversion_data.Total_Conversion\n", + " .divide(conversion_data.Impressions)\n", + " .multiply(100)\n", + " )\n", + "conversion_data.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 83, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "xyz_campaign_id\n", + "916 54\n", + "936 464\n", + "1178 625\n", + "dtype: int64" + ] + }, + "execution_count": 83, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "conversion_data.groupby(['xyz_campaign_id']).size()" + ] + }, + { + "cell_type": "code", + "execution_count": 84, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 84, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "conversion_data.boxplot(column=['CVR'],by='xyz_campaign_id',figsize=(8,8))" + ] + }, + { + "cell_type": "code", + "execution_count": 96, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([,\n", + " ],\n", + " dtype=object)" + ] + }, + "execution_count": 96, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "conversion_data.boxplot(column=['CTR','CVR'],by='xyz_campaign_id',figsize=(8,8))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Create a new column that represents Cost Per Mille (CPM) .\n", + "This number is the cost of one thousand impressions. If your objective is ad exposure to increase brand awareness, this might be an important KPI for you to measure." + ] + }, + { + "cell_type": "code", + "execution_count": 97, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ad_idxyz_campaign_idfb_campaign_idagegenderinterestImpressionsClicksSpentTotal_ConversionApproved_ConversionCTRCVRCPM
070874691610391630-34M15735011.43210.0136050.0272110.194558
170874991610391730-34M161786121.82200.0111980.0111980.101898
270877191610392030-34M2069300.00100.0000000.1443000.000000
370881591610392830-34M28425911.25100.0234800.0234800.293496
470881891610392830-34M28413311.29110.0241950.0241950.312122
\n", + "
" + ], + "text/plain": [ + " ad_id xyz_campaign_id fb_campaign_id age gender interest \\\n", + "0 708746 916 103916 30-34 M 15 \n", + "1 708749 916 103917 30-34 M 16 \n", + "2 708771 916 103920 30-34 M 20 \n", + "3 708815 916 103928 30-34 M 28 \n", + "4 708818 916 103928 30-34 M 28 \n", + "\n", + " Impressions Clicks Spent Total_Conversion Approved_Conversion \\\n", + "0 7350 1 1.43 2 1 \n", + "1 17861 2 1.82 2 0 \n", + "2 693 0 0.00 1 0 \n", + "3 4259 1 1.25 1 0 \n", + "4 4133 1 1.29 1 1 \n", + "\n", + " CTR CVR CPM \n", + "0 0.013605 0.027211 0.194558 \n", + "1 0.011198 0.011198 0.101898 \n", + "2 0.000000 0.144300 0.000000 \n", + "3 0.023480 0.023480 0.293496 \n", + "4 0.024195 0.024195 0.312122 " + ] + }, + "execution_count": 97, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "conversion_data['CPM'] = (conversion_data.Spent\n", + " .divide(conversion_data.Impressions)\n", + " .multiply(1000)\n", + " )\n", + "conversion_data.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Bonus: Cost per acquisition (CPA)" + ] + }, + { + "cell_type": "code", + "execution_count": 99, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ad_idxyz_campaign_idfb_campaign_idagegenderinterestImpressionsClicksSpentTotal_ConversionApproved_ConversionCTRCVRCPMCPA
070874691610391630-34M15735011.43210.0136050.0272110.1945581.43
170874991610391730-34M161786121.82200.0111980.0111980.101898inf
270877191610392030-34M2069300.00100.0000000.1443000.000000NaN
370881591610392830-34M28425911.25100.0234800.0234800.293496inf
470881891610392830-34M28413311.29110.0241950.0241950.3121221.29
\n", + "
" + ], + "text/plain": [ + " ad_id xyz_campaign_id fb_campaign_id age gender interest \\\n", + "0 708746 916 103916 30-34 M 15 \n", + "1 708749 916 103917 30-34 M 16 \n", + "2 708771 916 103920 30-34 M 20 \n", + "3 708815 916 103928 30-34 M 28 \n", + "4 708818 916 103928 30-34 M 28 \n", + "\n", + " Impressions Clicks Spent Total_Conversion Approved_Conversion \\\n", + "0 7350 1 1.43 2 1 \n", + "1 17861 2 1.82 2 0 \n", + "2 693 0 0.00 1 0 \n", + "3 4259 1 1.25 1 0 \n", + "4 4133 1 1.29 1 1 \n", + "\n", + " CTR CVR CPM CPA \n", + "0 0.013605 0.027211 0.194558 1.43 \n", + "1 0.011198 0.011198 0.101898 inf \n", + "2 0.000000 0.144300 0.000000 NaN \n", + "3 0.023480 0.023480 0.293496 inf \n", + "4 0.024195 0.024195 0.312122 1.29 " + ] + }, + "execution_count": 99, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "conversion_data['CPA'] = (conversion_data.Spent\n", + " .divide(conversion_data.Approved_Conversion)\n", + " )\n", + "conversion_data.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 102, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "count 1007.000000\n", + "mean inf\n", + "std NaN\n", + "min 0.000000\n", + "25% 15.956250\n", + "50% 83.437499\n", + "75% inf\n", + "max inf\n", + "Name: CPA, dtype: float64" + ] + }, + "execution_count": 102, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "conversion_data.CPA.describe()" + ] + }, + { + "cell_type": "code", + "execution_count": 146, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
xyz_campaign_idfb_campaign_idSpentApproved_Conversion
09161039161.4300001
19161039171.8200000
29161039200.0000000
39161039282.5400001
49161039290.0000001
...............
6861178179977358.1899972
6871178179978173.8800030
688117817997940.2899990
6891178179981198.7100002
6901178179982165.6099992
\n", + "

691 rows × 4 columns

\n", + "
" + ], + "text/plain": [ + " xyz_campaign_id fb_campaign_id Spent Approved_Conversion\n", + "0 916 103916 1.430000 1\n", + "1 916 103917 1.820000 0\n", + "2 916 103920 0.000000 0\n", + "3 916 103928 2.540000 1\n", + "4 916 103929 0.000000 1\n", + ".. ... ... ... ...\n", + "686 1178 179977 358.189997 2\n", + "687 1178 179978 173.880003 0\n", + "688 1178 179979 40.289999 0\n", + "689 1178 179981 198.710000 2\n", + "690 1178 179982 165.609999 2\n", + "\n", + "[691 rows x 4 columns]" + ] + }, + "execution_count": 146, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "conversion_data.groupby([ 'xyz_campaign_id','fb_campaign_id'],as_index=False).agg({'Spent':'sum',\n", + " 'Approved_Conversion':'sum'})" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.3" + }, + "toc-autonumbering": true + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/startup_data_analysis/notebook/Startup_data_analysis-MK.ipynb b/startup_data_analysis/notebook/Startup_data_analysis-MK.ipynb new file mode 100644 index 0000000..0073dfb --- /dev/null +++ b/startup_data_analysis/notebook/Startup_data_analysis-MK.ipynb @@ -0,0 +1,15720 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Indian Startup Data Exploration" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "India is one of the fastest growing economy in the world. There are a lot of innovative startups coming up in the region and a lot of funding for these startups as well.\n", + "\n", + "* Wanted to know what type of startups are getting funded in the last few years?\n", + "\n", + "* Wanted to know who are the important investors? \n", + "* Wanted to know the hot fields that get a lot of funding these days?\n", + "\n", + "Well, investors as well as startup founders have these questions in mind too. \n", + "\n", + "There are two main scenarios:\n", + "\n", + "* Investors are forming a partnership with the startups they choose to invest in – if the company turns a profit, investors make returns proportionate to their amount of equity in the startup; if the startup fails, the investors lose the money they’ve invested. So, they want to know, which startup to invest in.\n", + "\n", + "* Start-up companies often look to angel or investors to raise much-needed capital to get their business off the ground - but how does one value a brand new company?\n", + "\n", + "## Source [kaggle](https://www.kaggle.com/sudalairajkumar/indian-startup-funding?select=startup_funding.csv) scraped from [trak.in](https://trak.in/india-startup-funding-investment-2015/)\n", + "\n", + "This dataset is a chance to explore the Indian start up scene. Deep dive into funding data,derive insights to answer the above questions and also peek into the future of the market. \n", + "\n", + "We have been provided with data containing features like \n", + " - date\n", + " - industry verticals\n", + " - startup location\n", + " - investment type\n", + " - amount of investment\n", + " - investor names \n", + " and so on.\n", + "\n", + "Lets, start exploring and analyzing the data then!" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Importing Libraries" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + " \n", + " " + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "import os\n", + "import string\n", + "import datetime\n", + "import numpy as np \n", + "import pandas as pd \n", + "\n", + "import matplotlib.pyplot as plt\n", + "\n", + "import plotly.offline as py\n", + "py.init_notebook_mode(connected=True)\n", + "import plotly.graph_objs as go\n", + "import plotly_express as px" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Load the data" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
SNoDateStartupNameIndustryVerticalSubVerticalCityInvestorsNameInvestmentTypeAmountInUSDRemarksyearyearmonthCleanedAmount
0109/01/2020BYJU’SE-TechE-learningBangaloreTiger Global ManagementPrivate Equity Round20,00,00,000NaN20202020-01-01200000000.0
1213/01/2020ShuttlTransportationApp based shuttle serviceNCRSusquehanna Growth EquitySeries C80,48,394NaN20202020-01-018048394.0
2309/01/2020MamaearthE-commerceRetailer of baby and toddler productsBangaloreSequoia Capital IndiaSeries B1,83,58,860NaN20202020-01-0118358860.0
3402/01/2020https://www.wealthbucket.in/FinTechOnline InvestmentNCRVinod KhatumalPreseries A30,00,000NaN20202020-01-013000000.0
4502/01/2020FashorFashion and ApparelEmbroiled Clothes For WomenMumbaiSprout Venture PartnersSeed Funding18,00,000NaN20202020-01-011800000.0
\n", + "
" + ], + "text/plain": [ + " SNo Date StartupName IndustryVertical \\\n", + "0 1 09/01/2020 BYJU’S E-Tech \n", + "1 2 13/01/2020 Shuttl Transportation \n", + "2 3 09/01/2020 Mamaearth E-commerce \n", + "3 4 02/01/2020 https://www.wealthbucket.in/ FinTech \n", + "4 5 02/01/2020 Fashor Fashion and Apparel \n", + "\n", + " SubVertical City \\\n", + "0 E-learning Bangalore \n", + "1 App based shuttle service NCR \n", + "2 Retailer of baby and toddler products Bangalore \n", + "3 Online Investment NCR \n", + "4 Embroiled Clothes For Women Mumbai \n", + "\n", + " InvestorsName InvestmentType AmountInUSD Remarks \\\n", + "0 Tiger Global Management Private Equity Round 20,00,00,000 NaN \n", + "1 Susquehanna Growth Equity Series C 80,48,394 NaN \n", + "2 Sequoia Capital India Series B 1,83,58,860 NaN \n", + "3 Vinod Khatumal Preseries A 30,00,000 NaN \n", + "4 Sprout Venture Partners Seed Funding 18,00,000 NaN \n", + "\n", + " year yearmonth CleanedAmount \n", + "0 2020 2020-01-01 200000000.0 \n", + "1 2020 2020-01-01 8048394.0 \n", + "2 2020 2020-01-01 18358860.0 \n", + "3 2020 2020-01-01 3000000.0 \n", + "4 2020 2020-01-01 1800000.0 " + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df = pd.read_csv('../data/startup_data.csv')\n", + "df.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Task1: Number Of Fundings\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Can we get an overview of the number of fundings that has changed over time?" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "# lets begin by plotting using matplotlib and pandas\n", + "df.year.value_counts().sort_index(ascending=True).plot.bar(rot=0)\n", + "plt.title('Number of funding deals over the years')\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## In this notebook we will try to use plotly as much as we can" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "2015 936\n", + "2016 993\n", + "2017 687\n", + "2018 310\n", + "2019 111\n", + "2020 7\n", + "Name: year, dtype: int64" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "num_of_funding_rounds = df.year.value_counts().sort_index(ascending=True)\n", + "num_of_funding_rounds" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.plotly.v1+json": { + "config": { + "plotlyServerURL": "https://plot.ly" + }, + "data": [ + { + "type": "bar", + "x": [ + 2015, + 2016, + 2017, + 2018, + 2019, + 2020 + ], + "y": [ + 936, + 993, + 687, + 310, + 111, + 7 + ] + } + ], + "layout": { + "autosize": true, + "template": { + "data": { + "bar": [ + { + "error_x": { + "color": "#2a3f5f" + }, + "error_y": { + "color": "#2a3f5f" + }, + "marker": { + "line": { + "color": "#E5ECF6", + "width": 0.5 + } + }, + "type": "bar" + } + ], + "barpolar": [ + { + "marker": { + "line": { + "color": "#E5ECF6", + "width": 0.5 + } + }, + "type": "barpolar" + } + ], + "carpet": [ + { + "aaxis": { + "endlinecolor": "#2a3f5f", + "gridcolor": "white", + "linecolor": "white", + "minorgridcolor": "white", + "startlinecolor": "#2a3f5f" + }, + "baxis": { + "endlinecolor": "#2a3f5f", + "gridcolor": "white", + "linecolor": "white", + "minorgridcolor": "white", + "startlinecolor": "#2a3f5f" + }, + "type": "carpet" + } + ], + "choropleth": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "choropleth" + } + ], + "contour": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "contour" + } + ], + "contourcarpet": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "contourcarpet" + } + ], + "heatmap": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "heatmap" + } + ], + "heatmapgl": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "heatmapgl" + } + ], + "histogram": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "histogram" + } + ], + "histogram2d": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "histogram2d" + } + ], + "histogram2dcontour": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "histogram2dcontour" + } + ], + "mesh3d": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "mesh3d" + } + ], + "parcoords": [ + { + "line": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "parcoords" + } + ], + "pie": [ + { + "automargin": true, + "type": "pie" + } + ], + "scatter": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatter" + } + ], + "scatter3d": [ + { + "line": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatter3d" + } + ], + "scattercarpet": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattercarpet" + } + ], + "scattergeo": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattergeo" + } + ], + "scattergl": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattergl" + } + ], + "scattermapbox": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattermapbox" + } + ], + "scatterpolar": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterpolar" + } + ], + "scatterpolargl": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterpolargl" + } + ], + "scatterternary": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterternary" + } + ], + "surface": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "surface" + } + ], + "table": [ + { + "cells": { + "fill": { + "color": "#EBF0F8" + }, + "line": { + "color": "white" + } + }, + "header": { + "fill": { + "color": "#C8D4E3" + }, + "line": { + "color": "white" + } + }, + "type": "table" + } + ] + }, + "layout": { + "annotationdefaults": { + "arrowcolor": "#2a3f5f", + "arrowhead": 0, + "arrowwidth": 1 + }, + "coloraxis": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "colorscale": { + "diverging": [ + [ + 0, + "#8e0152" + ], + [ + 0.1, + "#c51b7d" + ], + [ + 0.2, + "#de77ae" + ], + [ + 0.3, + "#f1b6da" + ], + [ + 0.4, + "#fde0ef" + ], + [ + 0.5, + "#f7f7f7" + ], + [ + 0.6, + "#e6f5d0" + ], + [ + 0.7, + "#b8e186" + ], + [ + 0.8, + "#7fbc41" + ], + [ + 0.9, + "#4d9221" + ], + [ + 1, + "#276419" + ] + ], + "sequential": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "sequentialminus": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ] + }, + "colorway": [ + "#636efa", + "#EF553B", + "#00cc96", + "#ab63fa", + "#FFA15A", + "#19d3f3", + "#FF6692", + "#B6E880", + "#FF97FF", + "#FECB52" + ], + "font": { + "color": "#2a3f5f" + }, + "geo": { + "bgcolor": "white", + "lakecolor": "white", + "landcolor": "#E5ECF6", + "showlakes": true, + "showland": true, + "subunitcolor": "white" + }, + "hoverlabel": { + "align": "left" + }, + "hovermode": "closest", + "mapbox": { + "style": "light" + }, + "paper_bgcolor": "white", + "plot_bgcolor": "#E5ECF6", + "polar": { + "angularaxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + }, + "bgcolor": "#E5ECF6", + "radialaxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + } + }, + "scene": { + "xaxis": { + "backgroundcolor": "#E5ECF6", + "gridcolor": "white", + "gridwidth": 2, + "linecolor": "white", + "showbackground": true, + "ticks": "", + "zerolinecolor": "white" + }, + "yaxis": { + "backgroundcolor": "#E5ECF6", + "gridcolor": "white", + "gridwidth": 2, + "linecolor": "white", + "showbackground": true, + "ticks": "", + "zerolinecolor": "white" + }, + "zaxis": { + "backgroundcolor": "#E5ECF6", + "gridcolor": "white", + "gridwidth": 2, + "linecolor": "white", + "showbackground": true, + "ticks": "", + "zerolinecolor": "white" + } + }, + "shapedefaults": { + "line": { + "color": "#2a3f5f" + } + }, + "ternary": { + "aaxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + }, + "baxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + }, + "bgcolor": "#E5ECF6", + "caxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + } + }, + "title": { + "x": 0.05 + }, + "xaxis": { + "automargin": true, + "gridcolor": "white", + "linecolor": "white", + "ticks": "", + "title": { + "standoff": 15 + }, + "zerolinecolor": "white", + "zerolinewidth": 2 + }, + "yaxis": { + "automargin": true, + "gridcolor": "white", + "linecolor": "white", + "ticks": "", + "title": { + "standoff": 15 + }, + "zerolinecolor": "white", + "zerolinewidth": 2 + } + } + }, + "title": { + "text": "Number of funding deals by year" + }, + "xaxis": { + "autorange": true, + "range": [ + 2014.5, + 2020.5 + ], + "type": "linear" + }, + "yaxis": { + "autorange": true, + "range": [ + 0, + 1045.2631578947369 + ], + "type": "linear" + } + } + }, + "image/png": "", + "text/html": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "fig = go.Figure(data = go.Bar(x=num_of_funding_rounds.index,\n", + " y=num_of_funding_rounds.values),\n", + " layout_title_text='Number of funding deals by year')\n", + "fig.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Insight:\n", + "\n", + "\n", + "* Years 2015 & 2016 has got more number of fundings compared to the recent years\n", + "\n", + "* We can see a clear decling trend in the number of funding deals from 2016. \n", + "\n", + "Not sure of the exact reason. One thing could be that not all the funding deals are captured in the recent days.\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "df.yearmonth.value_counts().sort_index().plot(figsize=(12,5))\n", + "plt.title('Number of funding deals - month on month')\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Insight:\n", + "We can see a steady decline here as well but seems to be increasing in the last few months.\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Lets try to see if the decrease in deals has any impact on amount being invested?" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "import matplotlib.ticker as ticker\n", + "ax1 = df.yearmonth.value_counts().sort_index().plot( figsize=(15,8), color='tab:red', rot=90)\n", + "ax1.tick_params(axis='y', labelcolor='tab:red')\n", + "ax1.set_ylabel('no. of funding deals', color='tab:red')\n", + "\n", + "#https://matplotlib.org/gallery/api/two_scales.html\n", + "ax2 = ax1.twinx()\n", + "\n", + "# https://matplotlib.org/3.1.0/gallery/ticks_and_spines/custom_ticker1.html\n", + "def billions(x, pos):\n", + " 'The two args are the value and tick position'\n", + " return '$%1.1fB' % (x * 1e-9)\n", + "formatter_billions = ticker.FuncFormatter(billions)\n", + "\n", + "#https://stackoverflow.com/a/38152510/8210613\n", + "formatter = ticker.StrMethodFormatter('${x:,.0f}')\n", + "\n", + "ax2.yaxis.set_major_formatter(formatter_billions)\n", + "\n", + "df.groupby('yearmonth').CleanedAmount.sum().plot.bar(ax=ax2)\n", + "ax2.set_ylabel('total amount invested (in billions)')\n", + "plt.title('Funding deals against the amount invested')\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Insight\n", + "Inspite of less number of deals, in the recent past, the total investments have seen exponential growth?? or is it??" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Task2: Funding Values\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Can we get an overview of the funding values investors usually invest?" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "# lets convert the amount from string to numeric\n", + "df['CleanedAmount'] = pd.to_numeric(df.AmountInUSD.str.replace(',',''),errors='coerce')" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYIAAAEWCAYAAABrDZDcAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAgAElEQVR4nO3de5wcVZ338c83N1AJEpLsEhJy88mqwC5CRgjiKos3wsJmFVcjKpcF86DIyiPogiiK7sV1XZ4VUbIBWUGBeAWDJmoUEKI7gSSESwQkBhJiIhlCyAWQZJjf/lFnsNPp6amZ6eqemf6+X69+TXXVqVO/rnT6V3VOVR1FBGZm1ryGNDoAMzNrLCcCM7Mm50RgZtbknAjMzJqcE4GZWZNzIjAza3JOBLYbSXMlfapGdU2UtEPS0PT+dkln1aLuVN8iSafVqr4ebPefJD0p6fddLP+gpCfSZx9d421/RtI30/Ru+7fG26nZ98D6PyeCJiLpMUnPSdou6WlJv5J0tqQXvwcRcXZEfC5nXW+uViYi1kXEPhHxQg1if/EHsKT+mRFxbV/r7mEcBwHnAwdHxAEVlg8HLgPemj775qJiqeX+rVB3ru9BJZW+G5JOl7Sk5P2Zkh5K38UnJP1I0si07OuSdqZl2yU9IOlfJb28b5/KuuJE0HxOioiRwCTg88A/Al+r9UYkDat1nf3EJGBzRGzqYvmfAnsDq+oX0sAi6Y3AvwDvSd/FVwPfLiv2hbRsLHAGMAP4paSX1TXYJuFE0KQiYmtELADeDZwm6VB48Wjsn9L0GEk/TGcPT0m6U9IQSd8AJgK3pKaJj0uaLCnSkd464NaSeaVJ4RWS7pK0VdIPJO2ftnWspPWlMXYeWUo6HvgE8O60vXvT8hebmlJcn5S0VtImSdd1HkGWxHGapHWpWefirvaNpJen9dtSfZ9M9b8ZWAwcmOL4etl6fwY8nN4+LaniPiiL+3RJSyR9UdIWSY9KmllSdoqkX6Qj48XAmJJlu9Wd6v2cpF+m8j+VVFr+1PR5Nkv6VLWzurLvwbGS1ks6P+3bjZLO6Gr/5fBa4H8i4h6AiHgqIq6NiO3lBSPiDxFxN/A3wGiypGA15kTQ5CLiLmA98JcVFp+flo0lO9L9RLZKvB9YR3Z2sU9EfKFknTeSHeG9rYtNngr8PXAg0A5cniPGH5MdQX4rbe+wCsVOT6+/AqYC+wBXlJV5PfBK4E3AJZJe3cUmvwy8PNXzxhTzGRHxM2AmsCHFcXpZnL8BDklv94uI47r7bMlRZAlkDPAF4GuSlJbdACxPyz4HdNcncgrZj+WfACOACwAkHQx8FXgvMC59vvE54wM4oGSdM4GvSBrVg/VLLQXeJulSScdI2qu7FVKSWEzl76n10YBMBJKuSUcmD+QoO0nSzyXdl46YJtQjxgFmA7B/hfm7yH40JkXEroi4M7p/ONVnIuKZiHiui+XfiIgHIuIZ4FPAu1Sbzs73ApdFxJqI2AFcBMwuOxu5NCKei4h7gXuBPRJKiuXdwEURsT0iHgP+A3h/DWLsytqIuCq19V9Lts//VNJEsqPnT0XE8xFxB3BLN3X9d0T8Ju3/bwOvSfPfCdwSEUsiYidwCdCTB43tAj6bvgcLgR1kSbXHIuJO4B3AEcCPgM2SLsvxPejqe2p9NCATAfB14PicZb8IXBcRfwF8FvjXooIawMYDT1WY/+/AauCnktZIujBHXY/3YPlaYDglzR19cGCqr7TuYWRnMp1Kr/J5luysodwYsiPp8rp6cvTcUy/GFRHPpsl9yD7TlpQ0S2PJVRe7f8YDKdn3aTs96cjeHBHtXdRdrp3s37XUcLJk0rn9RRFxEtkP+yyys7nurijr6ntqfTQgE0E6MtrtCyHpFZJ+LGl5ast+VVp0MPDzNH0b2ZfOEkmvJfsPtqR8WToiPj8ipgInAR+V9KbOxV1U2d1R5kEl0xPJfhyeBJ4BXloS11CyJqm89W4g68gtrbsdeKKb9co9mWIqr+t3PaynU+eP+EtL5u1xtVEXNgKjtHsH6cRexrERePFsWNJLyNrci7AOmFw2bwoVklhEdETEz4FbgUO7qlDSPsCbgTtrF6Z1GpCJoAvzgHMjYjpZu+hX0/x7gZPT9NuBkarxtd0DkaR9JZ0IzAe+GRH3VyhzoqT/k9qrtwEvpBdkP7BTe7Hp90k6WNJLyc7QvpuaRH4D7C3pr5VdgvlJoLTt+AlgskoudS1zI/D/UufqPvyxT6G9i/IVpVi+DfyzpJGSJgEfBb5Zfc0u62sjSyLvkzRU0t8Dr8i57lpgGXCppBGSXk+WkHvju8BJkl4naQRwKaBu1umtbwHnSXqVMi1k/ULzASTNkjRb0qi0/EiyvpjW8ook7SVpOnAzsAX474JibmqDIhGk//ivA74jaSXwX2TtrJAlhTdKuofsy/Y7siPFZnWLpO1kzQQXk13z3tWVGNOAn5G1B/8P8NWIuD0t+1fgk8quKLqgB9v/BlnT3u/JLrP8B8iuYgI+BFxN9m/0DFlHdafvpL+bJa2oUO81qe47gEeBPwDn9iCuUuem7a8hO1O6IdXfWx8APkbWFHMI8KserHsKWWfyU8Cnget6E0BErCL7XPPJzg62A5uA53tTXzeuIvvBvgXYShbzxanTH7If9A8Aj5AdYHwT+PeIuL6kjo+n7+lTaf3lwOvKmsmsRjRQB6aRNBn4YUQcKmlf4OGIGNfNOvsAD0WEO4ytqaX/C08D0yLi0UbHY401KM4IImIb8KikvwNIp5uHpekxJc0JF9G3IzuzAUvSSZJemvocvgjcDzzW2KisPxiQiUDSjWRNFa9MN7qcSXb54JnKbjZaxR87hY8FHpb0G7IrSP65ASGb9QezyDrVN5A1+83OcTmwNYEB2zRkZma1MSDPCMzMrHYG3IPBxowZE5MnT250GGZmA8ry5cufjIixlZYNuEQwefJkli1b1ugwzMwGFEld3pVeeNNQuonmHkk/rLBMki6XtDo9C+iIouMxM7Pd1aOP4CPAg10sm0l29cI0YA5wZR3iMTOzEoUmgvSkz78mu1u0kllkD4SLiGgF9pNU9aYwMzOrraLPCP4T+DjQ0cXy8ez+NMr1VHjKo6Q5kpZJWtbW1lb7KM3MmlhhiSA90GxTRCyvVqzCvD1ubIiIeRHREhEtY8dW7PQ2M7NeKvKM4BjgbyQ9Rvagq+NUNvg42RlA6WOJJ5Dd9WhmZnVSWCKIiIsiYkJETAZmA7dGxPvKii0ATk1XD80AtkbExqJiMjMbqJav3cJXblvN8rVbal533e8jkHQ2QETMBRYCJ5CNgvUsHpjazGwPy9du4b1Xt7KzvYMRw4Zw/VkzmD6pt0NG76kuiSA9w/72ND23ZH4A59QjBjOzgap1zWZ2tnfQEbCrvYPWNZtrmgj8rCEzs35uxtTRjBg2hKGC4cOGMGNqbQdZHHCPmDAzazbTJ43i+rNm0LpmMzOmjq7p2QA4EZiZDQjTJ42qeQLo5KYhM7Mm50RgZtbknAjMzJqcE4GZWZNzIjAza3JOBGZmTc6JwMysyTkRmJk1OScCM7Mm50RgZtbknAjMzJqcE4GZWZNzIjAza3JFDl6/t6S7JN0raZWkSyuUOVbSVkkr0+uSouIxM7PKinwM9fPAcRGxQ9JwYImkRRHRWlbuzog4scA4zMysisISQRqGckd6Ozy9oqjtmZlZ7xTaRyBpqKSVwCZgcUQsrVDs6NR8tEjSIUXGY2Zmeyo0EUTECxHxGmACcKSkQ8uKrAAmRcRhwJeBmyvVI2mOpGWSlrW1tRUZsplZ06nLVUMR8TRwO3B82fxtEbEjTS8EhksaU2H9eRHREhEtY8eOrUfIZmZNo8irhsZK2i9NvwR4M/BQWZkDJClNH5ni2VxUTGZmtqcirxoaB1wraSjZD/y3I+KHks4GiIi5wDuBD0pqB54DZqdOZjMzq5Mirxq6Dzi8wvy5JdNXAFcUFYOZmXXPdxabmTU5JwIzsybnRGBm1uScCMzMmpwTgZlZk3MiMDNrck4EZmZNzonAzKzJORGYmTU5JwIzsybnRGBm1uScCMzMmpwTgZlZk3MiMDNrck4EZmZNzonAzKzJVR2YRtIrgTnAq9KsB4GrIuLhogMzM7P66PKMQNLRZAPObwfmAVcBzwC3SZrRXcWS9pZ0l6R7Ja2SdGmFMpJ0uaTVku6TdESvP4mZmfVKtTOCS4D3RMTtJfNulnQr8GlgZjd1Pw8cFxE7JA0HlkhaFBGtJWVmAtPS6yjgyvTXzMzqpFofwSvKkgAAEfELYGp3FUdmR3o7PL3KB6afBVyXyrYC+0kalytyMzOriWqJYHuVZc/kqVzSUEkrgU3A4ohYWlZkPPB4yfv1aV55PXMkLZO0rK2tLc+mzcwsp2pNQwdJurzCfFHhx7qSiHgBeI2k/YCbJB0aEQ+U1bXHahXqmUfWT0FLS8sey83MrPeqJYKPVVm2rCcbiYinJd0OHA+UJoL1wEEl7ycAG3pSt5mZ9U2XiSAiri2fJ2kU8HREdHtULmkssCslgZcAbwb+razYAuDDkuaTdRJvjYiNPfkAZmbWN9UuH71E0qvS9F7paqHfAk9IenOOuseRXWp6H3A3WR/BDyWdLensVGYhsAZYTXZ56of68FnMzKwXqjUNvRv4XJo+jaw9fyzwZ8C1wM+qVRwR9wGHV5g/t2Q6gHN6FrKZmdVStauGdpY0Ab0NmB8RL0TEg3RzR7KZmQ0c1RLB85IOTW39fwX8tGTZS4sNy8zM6qXakf15wHfJmoP+f0Q8CiDpBOCeOsRmZmZ1UO2qoVb++LC50vkLyTp5zcxsEOgyEUj6aNmsAJ4ElnSeHZiZ2cBXrY9gZNlrX6AFWCRpdh1iMzOzOqjWNLTHY6MBJO1Pduno/KKCMjOz+unxCGUR8RSVnxFkZmYDUI8TgaTjgC0FxGJmZg1QrbP4fvZ8Euj+ZA+FO7XIoMzMrH6q3UdwYtn7ADZHRK6xCMzMbGCo1lm8tp6BmJlZY/S4j8DMzAYXJwIzsybnRGBm1uS6TQSS3iHpEUlbJW2TtF3StnoEZ2ZmxctzRvAF4G8i4uURsW9EjIyIfbtbSdJBkm6T9KCkVZI+UqHMsSnBrEyvS3rzIczMrPfyDDDzRBqMpqfagfMjYoWkkcBySYsj4tdl5e6MiPJLVc3MrMTytVtoXbOZGVNHM33SqJrWnScRLJP0LeBm4PnOmRHx/WorpUHoN6bp7ZIeBMYD5YnAzMyqWL52C++9upWd7R2MGDaE68+aUdNkkKdpaF/gWeCtwEnp1aMjeEmTycYvXlph8dGS7pW0SNIhXaw/R9IyScva2tp6smkzswGvdc1mdrZ30BGwq72D1jWba1p/t2cEEXFGXzYgaR/ge8B5EVHeybwCmBQRO9LIZzcD0yrEMA+YB9DS0lL+2Aszs0FtxtTRjBg2hF3tHQwfNoQZU0fXtP5qzxr6eER8QdKX2fOZQ0TEP3RXuaThZEng+kpNSaWJISIWSvqqpDER8WTuT2BmNshNnzSK68+a0ZA+gs4O4mW9qViSgK8BD0bEZV2UOYCsMzokHUnWVFXbcx4zs0Fg+qRRNU8Anao9a+iW9PfaXtZ9DPB+4H5JK9O8TwATU71zgXcCH5TUDjwHzI4IN/2YmdVRnquGeiUiltDNADYRcQVwRVExmJlZ9/yICTOzJpfnERPH5JlnZmYDU54zgi/nnGdmZgNQtctHjwZeB4yV9NGSRfsCQ4sOzMzM6qNaZ/EIYJ9UZmTJ/G1kV/uYmdkgUO3y0V8Av5D0dQ9baWY2eOW5fHQvSfOAyaXlI+K4ooIyM7P6yZMIvgPMBa4GXig2HDMzq7c8iaA9Iq4sPBIzM2uIPJeP3iLpQ5LGSdq/81V4ZGZmVhd5zghOS38/VjIvgKm1D8fMzOotz3gEU+oRiJmZNUa3iUDSqZXmR8R1tQ/HzMzqLU/T0GtLpvcG3kQ2spgTgZnZIJCnaejc0veSXg58o7CIzMysrnrzGOpnqTCusJmZDUx5+ghu4Y9jFg8FXg18O8d6B5E1Hx0AdADzIuJLZWUEfAk4gSzBnB4RK3ryAczMrG/y9BF8sWS6HVgbEetzrNcOnB8RKySNBJZLWhwRvy4pM5Ps7GIacBRwZfprZmZ10m3TUHr43ENkTyAdBezMU3FEbOw8uo+I7cCDwPiyYrOA6yLTCuwnaVwP4jczsz7KM0LZu4C7gL8D3gUsldSjx1BLmgwcDiwtWzQeeLzk/Xr2TBZmZlagPE1DFwOvjYhNAJLGAj8DvptnA5L2Ab4HnBcR28oXV1glymdImgPMAZg4cWKezZqZWU55rhoa0pkEks0510PScLIkcH1EfL9CkfXAQSXvJwAbygtFxLyIaImIlrFjx+bZtJmZ5ZTnB/3Hkn4i6XRJpwM/AhZ1t1K6IuhrwIMRcVkXxRYApyozA9gaERtzxm5mZjWQ54ayj0k6GTiGrClnXkTclKPuY4D3A/dLWpnmfQKYmOqdCywku3R0Ndnlo2f0+BOYmVmf5OkjICK+J2lxZ3lJ+0fEU92ss4TKfQClZQI4J2esZmZWgDw3lP1f4LPAc2Q3hgk/htrMbNDIc0ZwAXBIRDxZdDBmZlZ/eTqLf0vWfm9mZoNQnjOCi4BfSVoKPN85MyL+obCozMysbvIkgv8CbgXuJ+sjMDOzQSRPImiPiI8WHomZmTVEnj6C2yTNkTRO0v6dr8IjMzOzushzRnBK+ntRyTxfPmpmNkjkubN4Sj0CMTOzxsh1Z7GkQ4GDyQavByAiPHi9mdkgkOfO4k8Dx5IlgoVko4otIRuG0szMBrg8ncXvBN4E/D4izgAOA/YqNCozM6ubPInguYjoANol7Qtswh3FZmaDRp4+gmWS9gOuApYDO8iGrjQzs0Egz1VDH0qTcyX9GNg3Iu4rNiwzM6uXXFcNdYqIxwqKw8zMGiTX2MNmZjZ4FZYIJF0jaZOkB7pYfqykrZJWptclRcViZmZdy3MfQaXnCm2PiF3drPp14Aqq329wZ0Sc2F0MZmZWnDxnBCuANuA3wCNp+lFJKyRN72qliLgDqDqusZmZNV6eRPBj4ISIGBMRo8nuLP428CHgq33c/tGS7pW0SNIhXRVKTz9dJmlZW1tbHzdpZmal8iSCloj4SeebiPgp8IaIaKVvdxivACZFxGHAl4GbuyoYEfMioiUiWsaOHduHTZqZWbk8ieApSf8oaVJ6fRzYImkofRixLCK2RcSONL0QGC5pTG/rMzOz3smTCE4BJpAdsf8AmJjmDQXe1dsNSzpAktL0kSmWzb2tz8xssLph6Tre/7Wl3LB0XSH157mz+Eng3C4Wr+5qPUk3kj21dIyk9cCngeGpzrlkD7P7oKR24DlgdkREj6I3Mxvkbli6jk/cdD8Adz7yJACnHDWxptvIc/nonwEXAJNLy0fEcdXWi4j3dLP8CrLLS83MrAuLHti4x/u6JwLgO8Bc4GrghZpu3czMqpp56LgXzwQ639dankTQHhFX1nzLZmbWrc6j/0UPbGTmoeNqfjYA+RLBLZI+BNwEPN85MyJ8s5iZWR2cctTEQhJApzyJ4LT092Ml8wIPTmNmNijkuWpoSj0CMTOzxugyEUg6LiJulfSOSssj4vvFhWVmZvVS7YzgjcCtwEkVlgXgRGBmNgh0mQgi4tPp7xn1C8fMzOqtWtPQR6utGBGX1T4cMzOrt2pNQyPT31cCrwUWpPcnAXcUGZSZmdVPtaahSwEk/RQ4IiK2p/efIbvb2MzMBoE8Tx+dCOwseb+T7LlDZmY2COS5oewbwF2SbiK7WujtVB+H2MzMBpA8N5T9s6QfA69Ps86IiHuKDcvMzOolzxkBwEpgY2d5SRMjopgREszMrK7yjEdwLtmgMk+QPYZaZE1Ef1FsaGZmVg95zgg+ArwyIno0jKSka4ATgU0RcWiF5QK+BJwAPAucHhErerINMzPruzxXDT0ObO1F3V8Hjq+yfCYwLb3mAB7zwMysguVrt/CV21azfO2WQurPc0awBrhd0o/YfTyCqncWR8QdkiZXKTILuC6NU9wqaT9J4yJiY5V1zMyayvK1W3jv1a3sbO9gxLAhXH/WDKZPGlXTbeQ5I1gHLAZGkN1t3Pnqq/FkZxud1qd5e5A0R9IyScva2tpqsGkzs4Ghdc1mdrZ30BGwq72D1jU9aqXPJc/lo5fWfKsZVdpcFzHMA+YBtLS0VCxjZjYYzZg6mhHDhrCrvYPhw4YwY+romm8jz1VDt1HhBzoijuvjttcDB5W8nwBs6GOdZmaDyvRJo7j+rBm0rtnMjKmja94sBPn6CC4omd4bOBlor8G2FwAfljQfOArY6v4BM7M9TZ80qpAE0ClP09Dyslm/lPSL7taTdCNwLDBG0nqyexGGpzrnAgvJLh1dTXb5qMc9MDNrgDxNQ/uXvB0CTAcO6G69iHhPN8sDOKe7eszMrFh5moZKzwjagUeBM4sJx8zM6q3aCGUTI2JdREypZ0BmZlZf1e4juLlzQtL36hCLmZk1QLVEUHqd/9SiAzEzs8aolgiii2kzMxtEqnUWHyZpG9mZwUvSNOl9RMS+hUdnZmaFqzZ4/dB6BmJmZo2R56FzZmbWQP3hMdRmZtYgy9du4T1Xtb740LkbP9CYx1CbmVmDfH/Fena2dxDAzvYOvr9ifc234URgZtaPlV+yWcQlnE4EZmb92MlHTGDEUCFgxFBx8hETar4N9xGYmfVj0yeN4sY5Rxc6HoHPCMzM+rmHf7+d1jWbefj32wup32cEZmb92A1L1/GJm+4H4M5HngTglKMm1nQbPiMwM+vHFj2wser7Wig0EUg6XtLDklZLurDC8mMlbZW0Mr0uKTIeM7OBZuah46q+r4XCmoYkDQW+AryFbKD6uyUtiIhflxW9MyJOLCoOM7OBrLMZaNEDG5l56LiaNwtBsWcERwKrI2JNROwE5gOzCtyemdmgc8PSdYUmASi2s3g88HjJ+/XAURXKHS3pXmADcEFErCowJjOzAaMeHcVQ7BmBKswrvyluBTApIg4DvkzJqGi7VSTNkbRM0rK2trYah2lm1j/Vo6MYik0E64GDSt5PIDvqf1FEbIuIHWl6ITBc0pjyiiJiXkS0RETL2LFjCwzZzKz/OGTcvlXf10qRieBuYJqkKZJGALOBBaUFJB0gSWn6yBTP5gJjMjMbMEa+ZPiL0yp7X0uF9RFERLukDwM/AYYC10TEKklnp+VzgXcCH5TUDjwHzI4ID4tpZgbMmDqaEcOGvPgI6hlTRxeynULvLE7NPQvL5s0tmb4CuKLIGMzMBrTOY+MCj5F9Z7GZWT/VumYz7R1BAC90BK1rimk5dyIwM+unZkwdzbChQxAwdGhxTUNOBGZm/ZmbhszMmlfrms3seiFrGtr1gpuGzMyaztI1m1+8CzeA7c/tKmQ7TgRmZv3QDUvXcUd6rESnVRu3FbItJwIzs37oq7c9sse8Ih5BDU4EZmb90u+3/WG398OHqrCnjzoRmJn1M397xRLaO3af9ycj9ypse04EZmb9yOcXPsjK9Vv3mH/OX00rbJtOBGZm/chVSx7dY95QFTMOQScnAjOzfuI1l/6EFzr2vHHspMMOLHS7TgRmZv3Aa/9pMU8/177H/ANG7sV/zj680G0X+vRRMzPr2nnz72HByg10VCnzlfdNLzwOJwIzszq6Yek6PnvLKv5QfllQBf/y9j9n+qRRhcfkRGBmVmPnzb+HH6zcsMcg7T3xt685sNAO4lJOBGZmOS1fu4UPXHs3Tz1bzDN/Or1h2pjC+wVKFZoIJB0PfIlsqMqrI+LzZcuVlp8APAucHhEriozJzPqPt/zH7TzS9kyjw+g3Xjp8CJ888ZC6nQl0KiwRSBoKfAV4C7AeuFvSgoj4dUmxmcC09DoKuDL9rbl6ZXIzs54YPkSc+fopXHjCqxsWQ5FnBEcCqyNiDYCk+cAsoDQRzAKuSwPWt0raT9K4iNhYy0CWr93CyVf+qpZVmpn1moC/nDaG684s5Li3x4pMBOOBx0ver2fPo/1KZcYDuyUCSXOAOQATJ/b8lKmowRzMzLrSqGae3igyEajCvPJO9DxliIh5wDyAlpaWHnfEFzXOp5k1n/52NF8LRSaC9cBBJe8nABt6UabPpk8axfc++Dr3EZj1Q9PGvozF5x/b6DCaWpGJ4G5gmqQpwO+A2cApZWUWAB9O/QdHAVtr3T/QafqkUay45K1FVG1mNqAVlggiol3Sh4GfkF0+ek1ErJJ0dlo+F1hIdunoarLLR88oKh4zM6us0PsIImIh2Y996by5JdMBnFNkDGZmVp2fPmpm1uScCMzMmpwTgZlZk3MiMDNrcsr6awcOSW3A2l6uPgZ4sobhDFbeT93zPsrH+6l79dpHkyJibKUFAy4R9IWkZRHR0ug4+jvvp+55H+Xj/dS9/rCP3DRkZtbknAjMzJpcsyWCeY0OYIDwfuqe91E+3k/da/g+aqo+AjMz21OznRGYmVkZJwIzsybXNIlA0vGSHpa0WtKFjY6nkSQ9Jul+SSslLUvz9pe0WNIj6e+okvIXpf32sKS3NS7yYkm6RtImSQ+UzOvxfpE0Pe3f1ZIul1RpAKYBqYt99BlJv0vfp5WSTihZ1oz76CBJt0l6UNIqSR9J8/vvdykiBv2L7DHYvwWmAiOAe4GDGx1XA/fHY8CYsnlfAC5M0xcC/5amD077ay9gStqPQxv9GQraL28AjgAe6Mt+Ae4CjiYbzGoRMLPRn63gffQZ4IIKZZt1H40DjkjTI4HfpH3Rb79LzXJGcCSwOiLWRMROYD4wq8Ex9TezgGvT9LXA35bMnx8Rz0fEo2RjRxzZgPgKFxF3AE+Vze7RfpE0Dtg3Iv4nsv/J15WsM+B1sY+60qz7aGNErEjT24EHycZi77ffpWZJBOOBx0ver0/zmlUAP5W0XNKcNO9PI40Ol/7+SZrf7Puup/tlfJounz/YfVjSfanpqLPJo+n3kaTJwOHAUvrxd6lZEkGldrVmvm72mIg4ApgJnCPpDVXKet9V1tV+acb9dSXwCuA1wEbgP9L8pt5HkvYBvgecFxHbqhWtMK+u+6lZEsF64KCS9xOADQ2KpQi4ANAAAAKtSURBVOEiYkP6uwm4iayp54l0Kkr6uykVb/Z919P9sj5Nl88ftCLiiYh4ISI6gKv4Y9Nh0+4jScPJksD1EfH9NLvffpeaJRHcDUyTNEXSCGA2sKDBMTWEpJdJGtk5DbwVeIBsf5yWip0G/CBNLwBmS9pL0hRgGlkHVrPo0X5Jp/zbJc1IV3icWrLOoNT545a8nez7BE26j9Jn+hrwYERcVrKo/36XGt3DXsee/BPIeu9/C1zc6HgauB+mkl2hcC+wqnNfAKOBnwOPpL/7l6xzcdpvDzOIru6osG9uJGva2EV2NHZmb/YL0EL2Y/hb4ArSHfyD4dXFPvoGcD9wH9mP2rgm30evJ2vCuQ9YmV4n9Ofvkh8xYWbW5JqlacjMzLrgRGBm1uScCMzMmpwTgZlZk3MiMDNrck4EZl2Q9Kselj9W0g+LisesKE4EZl2IiNc1OgazenAiMOuCpB3p77GSbpf0XUkPSbq+87nwaZyLhyQtAd5Rsu7L0gPY7pZ0j6RZaf7lki5J02+TdIck/z+0hhrW6ADMBojDgUPInvXyS+AYZYP6XAUcR/bo4G+VlL8YuDUi/l7SfsBdkn5G9hz6uyXdCVwOnBDZM3rMGsZHImb53BUR69OP9kpgMvAq4NGIeCSyW/S/WVL+rcCFklYCtwN7AxMj4lngA8Bi4IqI+G0dP4NZRT4jMMvn+ZLpF/jj/52untEi4OSIeLjCsj8HNgMH1i48s97zGYFZ7z0ETJH0ivT+PSXLfgKcW9KXcHj6Owk4n6ypaaako+oYr1lFTgRmvRQRfwDmAD9KncVrSxZ/DhgO3JcGev9cyeOJL4hsTIgzgasl7V3n0M1246ePmpk1OZ8RmJk1OScCM7Mm50RgZtbknAjMzJqcE4GZWZNzIjAza3JOBGZmTe5/AR1cwdHd97FEAAAAAElFTkSuQmCC\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "df.CleanedAmount.sort_values(ignore_index=True).plot(style='.')\n", + "plt.title('Distribution of funding in USD')\n", + "plt.xlabel('index')\n", + "plt.ylabel('Funding amount in USD')\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Insight:\n", + "\n", + "\n", + "There are some extreme values at the right. Let us see who are these very well funded startups.\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
SNoDateStartupNameIndustryVerticalSubVerticalCityInvestorsNameInvestmentTypeAmountInUSDRemarksyearyearmonthCleanedAmount
606127/08/2019Rapido Bike TaxiTransportationBike TaxiBangaloreWestbridge CapitalSeries B3,90,00,00,000NaN20192019-08-013.900000e+09
65165211/08/2017FlipkartE-CommerceOnline MarketplaceBangaloreSoftbankPrivate Equity2,50,00,00,000NaN20172017-08-012.500000e+09
83083118/05/2017PaytmE-CommerceMobile Wallet & ECommerce platformBangaloreSoftBank GroupPrivate Equity1,40,00,00,000NaN20172017-05-011.400000e+09
96696721/03/2017FlipkartE-CommerceECommerce MarketplaceBangaloreMicrosoft, eBay, Tencent HoldingsPrivate Equity1,40,00,00,000NaN20172017-03-011.400000e+09
313225/11/2019PaytmFinTechMobile WalletNCRVijay Shekhar SharmaFunding Round1,00,00,00,000NaN20192019-11-011.000000e+09
2648264928/07/2015Flipkart.comOnline MarketplaceNaNBangaloreSteadview Capital and existing investorsPrivate Equity70,00,00,000Late Stage, 10th Round More here20152015-07-017.000000e+08
2459246029/09/2015PaytmE-Commerce & M-Commerce platformNaNNCRAlibaba Group, Ant FinancialPrivate Equity68,00,00,000Late Stage (Alibaba @ 40% equity)20152015-09-016.800000e+08
18818930/08/2018True NorthFinancePrivate Equity FirmMumbaiNaNPrivate Equity60,00,00,000NaN20182018-08-016.000000e+08
333402/10/2019UdaanB2BBusiness developmentBangaloreAltimeter Capital, DST GlobalSeries D58,50,00,000NaN20192019-10-015.850000e+08
2244224518/11/2015OlaCar Aggregator & Retail Mobile AppNaNBangaloreBaillie Gifford, Falcon Edge Capital, Tiger Gl...Private Equity50,00,00,000Series F ( More Details Here)20152015-11-015.000000e+08
\n", + "
" + ], + "text/plain": [ + " SNo Date StartupName IndustryVertical \\\n", + "60 61 27/08/2019 Rapido Bike Taxi Transportation \n", + "651 652 11/08/2017 Flipkart E-Commerce \n", + "830 831 18/05/2017 Paytm E-Commerce \n", + "966 967 21/03/2017 Flipkart E-Commerce \n", + "31 32 25/11/2019 Paytm FinTech \n", + "2648 2649 28/07/2015 Flipkart.com Online Marketplace \n", + "2459 2460 29/09/2015 Paytm E-Commerce & M-Commerce platform \n", + "188 189 30/08/2018 True North Finance \n", + "33 34 02/10/2019 Udaan B2B \n", + "2244 2245 18/11/2015 Ola Car Aggregator & Retail Mobile App \n", + "\n", + " SubVertical City \\\n", + "60 Bike Taxi Bangalore \n", + "651 Online Marketplace Bangalore \n", + "830 Mobile Wallet & ECommerce platform Bangalore \n", + "966 ECommerce Marketplace Bangalore \n", + "31 Mobile Wallet NCR \n", + "2648 NaN Bangalore \n", + "2459 NaN NCR \n", + "188 Private Equity Firm Mumbai \n", + "33 Business development Bangalore \n", + "2244 NaN Bangalore \n", + "\n", + " InvestorsName InvestmentType \\\n", + "60 Westbridge Capital Series B \n", + "651 Softbank Private Equity \n", + "830 SoftBank Group Private Equity \n", + "966 Microsoft, eBay, Tencent Holdings Private Equity \n", + "31 Vijay Shekhar Sharma Funding Round \n", + "2648 Steadview Capital and existing investors Private Equity \n", + "2459 Alibaba Group, Ant Financial Private Equity \n", + "188 NaN Private Equity \n", + "33 Altimeter Capital, DST Global Series D \n", + "2244 Baillie Gifford, Falcon Edge Capital, Tiger Gl... Private Equity \n", + "\n", + " AmountInUSD Remarks year yearmonth \\\n", + "60 3,90,00,00,000 NaN 2019 2019-08-01 \n", + "651 2,50,00,00,000 NaN 2017 2017-08-01 \n", + "830 1,40,00,00,000 NaN 2017 2017-05-01 \n", + "966 1,40,00,00,000 NaN 2017 2017-03-01 \n", + "31 1,00,00,00,000 NaN 2019 2019-11-01 \n", + "2648 70,00,00,000 Late Stage, 10th Round More here 2015 2015-07-01 \n", + "2459 68,00,00,000 Late Stage (Alibaba @ 40% equity) 2015 2015-09-01 \n", + "188 60,00,00,000 NaN 2018 2018-08-01 \n", + "33 58,50,00,000 NaN 2019 2019-10-01 \n", + "2244 50,00,00,000 Series F ( More Details Here) 2015 2015-11-01 \n", + "\n", + " CleanedAmount \n", + "60 3.900000e+09 \n", + "651 2.500000e+09 \n", + "830 1.400000e+09 \n", + "966 1.400000e+09 \n", + "31 1.000000e+09 \n", + "2648 7.000000e+08 \n", + "2459 6.800000e+08 \n", + "188 6.000000e+08 \n", + "33 5.850000e+08 \n", + "2244 5.000000e+08 " + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.iloc[df.CleanedAmount.nlargest(10).index]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Insight:\n", + "\n", + "\n", + "* Rapido Bike Taxi looks like leading the pack by raising 3.9 Billion USD. But wait, this looks fishy. Infact Rapido raised 3.9 Billion INR and not USD. So this one is around 54 Million USD. This also shows that the data is not very accurate and so there should be caution in using it.\n", + " \n", + "* Three of the next four high fundings are flipkart which seems to be expected and the other one is PayTM.\n", + "* Also Swiggy raised 1 Billion USD last year which is not in the data.\n", + "\n", + "We will correct the data for Rapido and do the following analysis.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "3900000000.0" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.loc[60,'CleanedAmount']" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "50000000.0" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.loc[60,'CleanedAmount'] = 50e6\n", + "df.loc[60,'CleanedAmount']" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
summean
year
20158.673022e+091.326150e+07
20163.828089e+096.532574e+06
20171.042931e+102.287129e+07
20185.122368e+091.932969e+07
20195.836577e+095.612093e+07
20203.902073e+085.574389e+07
\n", + "
" + ], + "text/plain": [ + " sum mean\n", + "year \n", + "2015 8.673022e+09 1.326150e+07\n", + "2016 3.828089e+09 6.532574e+06\n", + "2017 1.042931e+10 2.287129e+07\n", + "2018 5.122368e+09 1.932969e+07\n", + "2019 5.836577e+09 5.612093e+07\n", + "2020 3.902073e+08 5.574389e+07" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "amt_df = df.groupby('year').CleanedAmount.agg(['sum','mean'])\n", + "amt_df" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.plotly.v1+json": { + "config": { + "plotlyServerURL": "https://plot.ly" + }, + "data": [ + { + "type": "bar", + "x": [ + 2015, + 2016, + 2017, + 2018, + 2019, + 2020 + ], + "y": [ + 8673022368, + 3828088608, + 10429309730, + 5122368369, + 5836576535.22, + 390207254 + ] + } + ], + "layout": { + "autosize": true, + "template": { + "data": { + "bar": [ + { + "error_x": { + "color": "#2a3f5f" + }, + "error_y": { + "color": "#2a3f5f" + }, + "marker": { + "line": { + "color": "#E5ECF6", + "width": 0.5 + } + }, + "type": "bar" + } + ], + "barpolar": [ + { + "marker": { + "line": { + "color": "#E5ECF6", + "width": 0.5 + } + }, + "type": "barpolar" + } + ], + "carpet": [ + { + "aaxis": { + "endlinecolor": "#2a3f5f", + "gridcolor": "white", + "linecolor": "white", + "minorgridcolor": "white", + "startlinecolor": "#2a3f5f" + }, + "baxis": { + "endlinecolor": "#2a3f5f", + "gridcolor": "white", + "linecolor": "white", + "minorgridcolor": "white", + "startlinecolor": "#2a3f5f" + }, + "type": "carpet" + } + ], + "choropleth": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "choropleth" + } + ], + "contour": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "contour" + } + ], + "contourcarpet": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "contourcarpet" + } + ], + "heatmap": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "heatmap" + } + ], + "heatmapgl": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "heatmapgl" + } + ], + "histogram": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "histogram" + } + ], + "histogram2d": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "histogram2d" + } + ], + "histogram2dcontour": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "histogram2dcontour" + } + ], + "mesh3d": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "mesh3d" + } + ], + "parcoords": [ + { + "line": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "parcoords" + } + ], + "pie": [ + { + "automargin": true, + "type": "pie" + } + ], + "scatter": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatter" + } + ], + "scatter3d": [ + { + "line": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatter3d" + } + ], + "scattercarpet": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattercarpet" + } + ], + "scattergeo": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattergeo" + } + ], + "scattergl": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattergl" + } + ], + "scattermapbox": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattermapbox" + } + ], + "scatterpolar": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterpolar" + } + ], + "scatterpolargl": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterpolargl" + } + ], + "scatterternary": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterternary" + } + ], + "surface": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "surface" + } + ], + "table": [ + { + "cells": { + "fill": { + "color": "#EBF0F8" + }, + "line": { + "color": "white" + } + }, + "header": { + "fill": { + "color": "#C8D4E3" + }, + "line": { + "color": "white" + } + }, + "type": "table" + } + ] + }, + "layout": { + "annotationdefaults": { + "arrowcolor": "#2a3f5f", + "arrowhead": 0, + "arrowwidth": 1 + }, + "coloraxis": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "colorscale": { + "diverging": [ + [ + 0, + "#8e0152" + ], + [ + 0.1, + "#c51b7d" + ], + [ + 0.2, + "#de77ae" + ], + [ + 0.3, + "#f1b6da" + ], + [ + 0.4, + "#fde0ef" + ], + [ + 0.5, + "#f7f7f7" + ], + [ + 0.6, + "#e6f5d0" + ], + [ + 0.7, + "#b8e186" + ], + [ + 0.8, + "#7fbc41" + ], + [ + 0.9, + "#4d9221" + ], + [ + 1, + "#276419" + ] + ], + "sequential": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "sequentialminus": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ] + }, + "colorway": [ + "#636efa", + "#EF553B", + "#00cc96", + "#ab63fa", + "#FFA15A", + "#19d3f3", + "#FF6692", + "#B6E880", + "#FF97FF", + "#FECB52" + ], + "font": { + "color": "#2a3f5f" + }, + "geo": { + "bgcolor": "white", + "lakecolor": "white", + "landcolor": "#E5ECF6", + "showlakes": true, + "showland": true, + "subunitcolor": "white" + }, + "hoverlabel": { + "align": "left" + }, + "hovermode": "closest", + "mapbox": { + "style": "light" + }, + "paper_bgcolor": "white", + "plot_bgcolor": "#E5ECF6", + "polar": { + "angularaxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + }, + "bgcolor": "#E5ECF6", + "radialaxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + } + }, + "scene": { + "xaxis": { + "backgroundcolor": "#E5ECF6", + "gridcolor": "white", + "gridwidth": 2, + "linecolor": "white", + "showbackground": true, + "ticks": "", + "zerolinecolor": "white" + }, + "yaxis": { + "backgroundcolor": "#E5ECF6", + "gridcolor": "white", + "gridwidth": 2, + "linecolor": "white", + "showbackground": true, + "ticks": "", + "zerolinecolor": "white" + }, + "zaxis": { + "backgroundcolor": "#E5ECF6", + "gridcolor": "white", + "gridwidth": 2, + "linecolor": "white", + "showbackground": true, + "ticks": "", + "zerolinecolor": "white" + } + }, + "shapedefaults": { + "line": { + "color": "#2a3f5f" + } + }, + "ternary": { + "aaxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + }, + "baxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + }, + "bgcolor": "#E5ECF6", + "caxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + } + }, + "title": { + "x": 0.05 + }, + "xaxis": { + "automargin": true, + "gridcolor": "white", + "linecolor": "white", + "ticks": "", + "title": { + "standoff": 15 + }, + "zerolinecolor": "white", + "zerolinewidth": 2 + }, + "yaxis": { + "automargin": true, + "gridcolor": "white", + "linecolor": "white", + "ticks": "", + "title": { + "standoff": 15 + }, + "zerolinecolor": "white", + "zerolinewidth": 2 + } + } + }, + "title": { + "text": "Total investments by year" + }, + "xaxis": { + "autorange": true, + "range": [ + 2014.5, + 2020.5 + ], + "type": "linear" + }, + "yaxis": { + "autorange": true, + "range": [ + 0, + 10978220768.421053 + ], + "type": "linear" + } + } + }, + "image/png": "", + "text/html": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "fig = go.Figure(data = go.Bar(x = amt_df.index,\n", + " y= amt_df['sum']),\n", + " layout_title_text = 'Total investments by year')\n", + "fig.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Insight:\n", + "* Though 2016 is the year with most number of funding deals, it is the year with the lowest sum (2020 is yet to complete)\n", + "* 2017 has got the highest total amount of funding in the last 5 years. Out of the 10B in 2017, 5.5B is raised by Flipkart and PayTM in 3 deals which we can see in the table above the plot.\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.plotly.v1+json": { + "config": { + "plotlyServerURL": "https://plot.ly" + }, + "data": [ + { + "type": "bar", + "x": [ + 2015, + 2016, + 2017, + 2018, + 2019, + 2020 + ], + "y": [ + 13261502.091743119, + 6532574.416382252, + 22871293.26754386, + 19329691.958490565, + 56120928.22326923, + 55743893.428571425 + ] + } + ], + "layout": { + "autosize": true, + "template": { + "data": { + "bar": [ + { + "error_x": { + "color": "#2a3f5f" + }, + "error_y": { + "color": "#2a3f5f" + }, + "marker": { + "line": { + "color": "#E5ECF6", + "width": 0.5 + } + }, + "type": "bar" + } + ], + "barpolar": [ + { + "marker": { + "line": { + "color": "#E5ECF6", + "width": 0.5 + } + }, + "type": "barpolar" + } + ], + "carpet": [ + { + "aaxis": { + "endlinecolor": "#2a3f5f", + "gridcolor": "white", + "linecolor": "white", + "minorgridcolor": "white", + "startlinecolor": "#2a3f5f" + }, + "baxis": { + "endlinecolor": "#2a3f5f", + "gridcolor": "white", + "linecolor": "white", + "minorgridcolor": "white", + "startlinecolor": "#2a3f5f" + }, + "type": "carpet" + } + ], + "choropleth": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "choropleth" + } + ], + "contour": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "contour" + } + ], + "contourcarpet": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "contourcarpet" + } + ], + "heatmap": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "heatmap" + } + ], + "heatmapgl": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "heatmapgl" + } + ], + "histogram": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "histogram" + } + ], + "histogram2d": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "histogram2d" + } + ], + "histogram2dcontour": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "histogram2dcontour" + } + ], + "mesh3d": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "mesh3d" + } + ], + "parcoords": [ + { + "line": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "parcoords" + } + ], + "pie": [ + { + "automargin": true, + "type": "pie" + } + ], + "scatter": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatter" + } + ], + "scatter3d": [ + { + "line": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatter3d" + } + ], + "scattercarpet": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattercarpet" + } + ], + "scattergeo": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattergeo" + } + ], + "scattergl": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattergl" + } + ], + "scattermapbox": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattermapbox" + } + ], + "scatterpolar": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterpolar" + } + ], + "scatterpolargl": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterpolargl" + } + ], + "scatterternary": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterternary" + } + ], + "surface": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "surface" + } + ], + "table": [ + { + "cells": { + "fill": { + "color": "#EBF0F8" + }, + "line": { + "color": "white" + } + }, + "header": { + "fill": { + "color": "#C8D4E3" + }, + "line": { + "color": "white" + } + }, + "type": "table" + } + ] + }, + "layout": { + "annotationdefaults": { + "arrowcolor": "#2a3f5f", + "arrowhead": 0, + "arrowwidth": 1 + }, + "coloraxis": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "colorscale": { + "diverging": [ + [ + 0, + "#8e0152" + ], + [ + 0.1, + "#c51b7d" + ], + [ + 0.2, + "#de77ae" + ], + [ + 0.3, + "#f1b6da" + ], + [ + 0.4, + "#fde0ef" + ], + [ + 0.5, + "#f7f7f7" + ], + [ + 0.6, + "#e6f5d0" + ], + [ + 0.7, + "#b8e186" + ], + [ + 0.8, + "#7fbc41" + ], + [ + 0.9, + "#4d9221" + ], + [ + 1, + "#276419" + ] + ], + "sequential": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "sequentialminus": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ] + }, + "colorway": [ + "#636efa", + "#EF553B", + "#00cc96", + "#ab63fa", + "#FFA15A", + "#19d3f3", + "#FF6692", + "#B6E880", + "#FF97FF", + "#FECB52" + ], + "font": { + "color": "#2a3f5f" + }, + "geo": { + "bgcolor": "white", + "lakecolor": "white", + "landcolor": "#E5ECF6", + "showlakes": true, + "showland": true, + "subunitcolor": "white" + }, + "hoverlabel": { + "align": "left" + }, + "hovermode": "closest", + "mapbox": { + "style": "light" + }, + "paper_bgcolor": "white", + "plot_bgcolor": "#E5ECF6", + "polar": { + "angularaxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + }, + "bgcolor": "#E5ECF6", + "radialaxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + } + }, + "scene": { + "xaxis": { + "backgroundcolor": "#E5ECF6", + "gridcolor": "white", + "gridwidth": 2, + "linecolor": "white", + "showbackground": true, + "ticks": "", + "zerolinecolor": "white" + }, + "yaxis": { + "backgroundcolor": "#E5ECF6", + "gridcolor": "white", + "gridwidth": 2, + "linecolor": "white", + "showbackground": true, + "ticks": "", + "zerolinecolor": "white" + }, + "zaxis": { + "backgroundcolor": "#E5ECF6", + "gridcolor": "white", + "gridwidth": 2, + "linecolor": "white", + "showbackground": true, + "ticks": "", + "zerolinecolor": "white" + } + }, + "shapedefaults": { + "line": { + "color": "#2a3f5f" + } + }, + "ternary": { + "aaxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + }, + "baxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + }, + "bgcolor": "#E5ECF6", + "caxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + } + }, + "title": { + "x": 0.05 + }, + "xaxis": { + "automargin": true, + "gridcolor": "white", + "linecolor": "white", + "ticks": "", + "title": { + "standoff": 15 + }, + "zerolinecolor": "white", + "zerolinewidth": 2 + }, + "yaxis": { + "automargin": true, + "gridcolor": "white", + "linecolor": "white", + "ticks": "", + "title": { + "standoff": 15 + }, + "zerolinecolor": "white", + "zerolinewidth": 2 + } + } + }, + "title": { + "text": "Avg. investments by year" + }, + "xaxis": { + "autorange": true, + "range": [ + 2014.5, + 2020.5 + ], + "type": "linear" + }, + "yaxis": { + "autorange": true, + "range": [ + 0, + 59074661.28765182 + ], + "type": "linear" + } + } + }, + "image/png": "iVBORw0KGgoAAAANSUhEUgAAA9MAAAHCCAYAAADsGJpSAAAgAElEQVR4nO3dT29c2Z3f4Xk9fBNc8y00F1wE4KIhZBVSgFcRHMDAgODKs5imY0AZcYLAgRPAaiABeuF4JojiP4rbst22xnAwgFuw0i3/TUuU2vbJwrl0sVT1u3XqkPzVqfs8wGchSmJTYvU99ysWyb8qAAAAQJW/yn4DAAAAoDfGNAAAAFQypgEAAKCSMQ0AAACVjGkAAACoZEwDAABAJWMaAAAAKhnTAAAAUMmYBgAAgErGNAAAAFQypgEAAKCSMQ0AAACVjGkAAACoZEwDAABAJWMaAAAAKhnTAAAAUMmYBgAAgErGNAAAAFQypgEAAKCSMQ0AAACVjGkAAACoZEwDAABAJWMaAAAAKhnTAAAAUMmYBgAAgErGNAAAAFQypgEAAKCSMQ0AAACVjGkAAACoZEwDAABAJWMaAAAAKhnTAAAAUMmYBgAAgErGNAAAAFQypgEAAKCSMQ0AAACVjGmo8IUvnZV33v1i9psBAAAk28ox/bVvfLPs7B6Und2D8smL32S/OQt9+atfLzu7B+Wjf/rn7DdlJR/90z+Xnd2D8q1HH2a/KSu5qbfXmAYAAErZ0jH9zrtfvBzTX/vGN7PfnIWM6ZtlTAMAADdp68b07Ih6590vGj7XxJj+M2MaAAAoZQvH9PAR31L+8nTv2Y/+fvLiN2Vn96B8+atfX/j7d3YPyhe+dHblZV/40tnlR7qHn28dasPbNvs09GGoDW/j0Ox4W+ftH/5OZt/+ed969OGVXzP75xuG6XzD2zC83fO/bvjvzL/uZW/7ordh0Z/vy1/9evg6x97eRX8nq35KwLL30ezf6Trvo3nRPwQNj79Zq7yPZ5+xseixNf9nHN7WTf+UCQAAyLB1Y3p2xCwbNbODe9Yw0GZHw6JRMwzhmxjTiwbOsiG46ts//2vn/0yL/tFh+G8Mryv6SO/wdi8alMtePv/0+0VPe59/G4a/i2XjefZtW+Xtjf5OloneR7N/zkVveymL3++LDO/L+bd/0Z9rlffx8LL5TytY9Otm//EIAABYbKvG9DBAZgfDouE0DJL5QfeFL51dGUTLhs91PIU4+sj0vPnx3Pr2D4N2ePuX/Xe/9ejDlcf0ot//zrtfXPoR0tmXR6/7nXe/eGU4L/uIb83rXPY6Vvn8+ujvavaxt+wfDeb/PJFFf39f/urXF/5DyNj7eJlF/wDjqewAADBuq8b0ohGw7CN887920fhaNiqyx3Tr219KuTL0htcfPfV4nTG96sujj9bO/wPBdYzp4SO563xxumV/pkXjedn7aNUvOrfo72XsvzFr0Z9x9ivdzzb/D1DGNAAAxLZmTM9/Dut880Nx/iOJ8x/xK2X5R1Y3YUyv+vZHfydjn0M8O8Ruckwv+m8v+7ze6xjTy/5uVhm5NWN6/m2Y/4eBVcz+eYfHzNifY9H7ePb/j9nH3LJncxjTAAAQ25oxHX10c9nnr86OjWVPyd3UMb3K27/OeBsMI20YWbfxkelVXNeYnjU7NMfUPltheB+s+rTrebN/N4ueIr7q+3jZ/x/GNAAArGdrxnT0BaSWfY7xMDCWDY1lI3ZTxvTY27/qF7taNE7n/4zRGGwd0zV/n6uO6WVv7ycvfrPw6d2r/l0t+zONjdVFzxxYxewX0Wt9Hy96DBnTAACwnq0Y08vG8qxFY3v2I5KLBtqyrwY+/J7ZoTb82lVHyHWM6bG3v5TFX+l5eJ3D27/o1ywab8s+Ut86pmf/e/NPtf7Wow/X+gJky142/J0t+rU1X8171tjjb/j7XfcfX4a/m5b38aKvHTD77cWMaQAAqLMVY3rZR+1mLfr2T6X8ZRwt+3zZRZ+LvegLSWWM6VXe/tnfP9+ssZ+f/TPOD/jrGNOlLP4+0/N/tpoxveztXfR9qFd9Ovzwts///mgoR++/VSz66PGy/0b0Plz0vbl9ZBoAANazFWP6ti36dkKwyLJvkVVj2TMCAACAPMb0iHfe/WL4hcsgssqzJiLLvrUbAACQy5gesegpvS0fZWQ6ln3OfY1VP5cbAAC4XcY0AAAAVDKmAQAAoJIxDQAAAJWMaQAAAKhkTAMAAEAlYxoAAAAqGdMAAABQyZgGAACASsY0AAAAVDKmAQAAoJIxDQAAAJWMaQAAAKhkTAMAAEAlYxoAAAAqGdMAAABQyZgGAACASsY0AAAAVDKmAQAAoJIxDQAAAJWMaQAAAKhkTAMAAEAlYxoAAAAqGdMAAABQyZgGAACASsY0AAAAVDKmAQAAoJIxDQAAAJWMaQAAAKhkTAMAAEAlYxoAAAAqGdMAAABQyZgGAACASsY0AAAAVDKmAQAAoJIxDQAAAJWMaQAAAKhkTAMAAEAlYxoAAAAqGdMAAABQyZgGAACASsY0AAAAVDKmAQAAoJIxDQAAAJWMaQAAAKhkTAMAAEAlYxoAAAAqGdMAAABQyZhu9PGnLyVJkiSpu2hjTDfK/h9AkiRJktaJNsZ0o+z/ASRJkiRpnWhjTDfK/h9AkiRJktaJNsZ0o+z/ASRJkiRpnWhjTDfK/h9AkiRJktaJNsZ0o+z/ASRJkiRpnWhjTDfK/h9AkiRJktaJNsZ0o+z/ASRJkiRpnWhjTDfK/h9AkiRJktaJNsZ0o+z/ASRJkiRpnWhjTDfK/h9AkiRJktaJNsZ0o+z/ASRJkiRpnWhjTDfK/h9AkiSpph/9/FX57997Vf5R3fTtJ6/Kx598lv7Y0fZFG2O6Ufb/AJIkSTV9+8nLcufu6/Iv/uUbddLf/t1F+cWntzumn71QT637fqaNMd0o+0CUJEmqyZjur9se0//1Hy7K3/9n9dJ/+Mar8uFPLtZ6X9PGmG6UfSBKkiTVZEz3122O6V98+rL87d95fPTUnbtv/vypAGu8v2ljTDfKPhAlSZJqMqb7y5hWlDGdx5hulH0gSpIk1WRM95cxrShjOo8x3Sj7QJQkSarJmO4vY1pRxnQeY7pR9oEoSZJUkzHdX8a0oozpPMZ0o+wDUZIkqSZjur+MaUUZ03mM6UbZB6IkSVJNxnR/GdOKMqbzGNONsg9ESZKkmozp/jKmFWVM5zGmG2UfiJIkSTUZ0/1lTCvKmM5jTDfKPhAlSZJqMqb7y5hWlDGdx5hulH0gSpIk1WRM95cxrShjOo8x3Sj7QJQkSarJmO4vY1pRxnQeY7pR9oEoSZJUkzHdX8a0oozpPMZ0o+wDUZIkqSZjur+MaUUZ03mM6UbZB6IkSVJNxnR/GdOKMqbzGNONsg9ESZKkmozp/jKmFWVM5zGmG2UfiJIkSTUZ0/1lTCvKmM5jTDfKPhAlSZJqMqb7y5hWlDGdx5hulH0gSpIk1WRM95cxrShjOo8x3Sj7QJQkSarJmO4vY1pRxnQeY7pR9oEoSZJUkzHdX8a0oozpPFs7ps/OH5ad3YO3mnV4dHL58sOjkys/t7d//NavL6WUnd2Dsrd/fPnj7ANRkiSpJmO6v4xpRRnTebZ6TM8P5Fn3Tu9f+fnDo5Ny7/T+5Y/39o/L4dFJOTt/+NbrNKYlSVKvGdP9ZUwrypjOM9kxvbd/XB4/eXr548dPnl4ZycPPz350emf3oLz/wSNjWpIkdZsx3V/GtKKM6TxbPaZnn949O4CfPX9RdnYPyrPnL5a+bBjT907vl7Pzh5fNj+lnL15KknQjffxp/tug7cuY7q/3HlyUj198diuPj49fvCzvPfD46Kk7d9+U7zx5tdb7mzZbO6bnHR6dXH6kumZMDy8fBvT8mP7TnyRJuplef/7H9LdB29eTn74xpjvrK+cXt3Y9eP35n8pXzj0+eurO3TflydPP13p/02YyY3r2Kds1Y7qUcvnR6VLeHtPZT9WSJEmqyUem+8vTvBXlad55JjmmS1n9c6bnGdOSJKnnjOn+MqYVZUzn2doxPTt4hx/PfrXuVb6atzEtSZK2LWO6v4xpRRnTebZ2TM9+D+md3YMrQ3nRr1n0faaNaUmStG0Z0/1lTCvKmM6ztWP6tmQfiJIkSTUZ0/1lTCvKmM5jTDfKPhAlSZJqMqb7y5hWlDGdx5hulH0gSpIk1WRM95cxrShjOo8x3Sj7QJQkSarJmO4vY1pRxnQeY7pR9oEoSZJUkzHdX8a0oozpPMZ0o+wDUZIkqSZjur+MaUUZ03mM6UbZB6IkSVJNxnR/GdOKMqbzGNONsg9ESZKkmozp/jKmFWVM5zGmG2UfiJIkSTUZ0/1lTCvKmM5jTDfKPhAlSZJqMqb7y5hWlDGdx5hulH0gSpIk1WRM95cxrShjOo8x3Sj7QJQkSarJmO4vY1pRxnQeY7pR9oEoSZJUkzHdX8a0oozpPMZ0o+wDUZIkqSZjur+MaUUZ03mM6UbZB6IkSVJNxnR/GdOKMqbzGNONsg9ESZKkmozp/jKmFWVM5zGmG2UfiJIkSTUZ0/1lTCvKmM5jTDfKPhAlSZJqMqb7y5hWlDGdx5hulH0gSpIk1WRM95cxrShjOo8x3Sj7QJQkSarJmO4vY1pRxnQeY7pR9oEoSZJUkzHdX8a0oozpPMZ0o+wDUZIkqSZjur+MaUUZ03mM6UbZB6IkSVJNxnR/GdOKMqbzGNONsg9ESZKkmozp/jKmFWVM5zGmG2UfiJIkSTUZ0/1lTCvKmM5jTDfKPhAlSZJqMqb7y5hWlDGdx5hulH0gSpIk1WRM95cxrShjOo8x3Sj7QJQkSarJmO4vY1pRxnQeY7pR9oEoSZJUkzHdX8a0oozpPMZ0o+wDUZIkqSZjur+MaUUZ03mM6UbZB6IkSVJNxnR/GdOKMqbzGNONsg9ESZKkmozp/jKmFWVM5zGmG2UfiJIkSTUZ0/1lTCvKmM5jTDfKPhAlSZJqMqb7y5hWlDGdx5hulH0gSpIk1WRM95cxrShjOo8x3Sj7QJQkSarJmO4vY1pRxnSeSYzp9z94VHZ2D8rjJ0+vvPzw6KTs7B6Und2Dcnh0cuXn9vaPy87uwVuva2f3oOztH1/+OPtAlCRJqsmY7i9jWlHGdJ6tH9Pvf/DochjPjul7p/evDOjDo5Ny7/T+5Y/39o/L4dFJOTt/ePmys/OH5fDoxJiWJEndZkz3lzGtKGM6z1aP6WFIl1LeGtN7+8dXfvz4ydMrI3n4+dmPTu/sHlx5naUY05Ikqa+M6f4yphVlTOfZ2jE9P3pnx/Sz5y/Kzu5Befb8xeXPz79sGNP3Tu+Xs/OHlxnTkiSp54zp/jKmFWVM59nKMT0/eEtZf0wPLx9e3/zrfvG7C0mT7bV0o336W48zXX/f/eErY7qzzh5clE9++6rcxtn2yW8vytkDj4+eunP3Tfnej9Z7f9NmK8f0vdP7l19YbL73P3hUNaaH1zd87vT8mH558QdJk+1z6Ub73cs36W+Dtq/vf3RhTHfW2YOL/389uPmz7XcvPzemO+vO3Tfl+x+t9/igzVaO6UXW/ZzpeZ7mLUmSes7TvPvL07wV5WneeSY7plf5at7GtCRJ2raM6f4yphVlTOeZ7JguZfz7TBvTkiRp2zKm+8uYVpQxnWcyY/qmZB+IkiRJNRnT/WVMK8qYzmNMN8o+ECVJkmoypvvLmFaUMZ3HmG6UfSBKkiTVZEz3lzGtKGM6jzHdKPtAlCRJqsmY7i9jWlHGdB5julH2gShJklSTMd1fxrSijOk8xnSj7ANRkiSpJmO6v4xpRRnTeYzpRtkHoiRJUk3GdH8Z04oypvMY042yD0RJkqSajOn+MqYVZUznMaYbZR+IkiRJNRnT/WVMK8qYzmNMN8o+ECVJkmoypvvLmFaUMZ3HmG6UfSBKkiTVZEz3lzGtKGM6jzHdKPtAlCRJqsmY7i9jWlHGdB5julH2gShJklSTMd1fxrSijOk8xnSj7ANRkiSpJmO6v4xpRRnTeYzpRtkHoiRJUk3GdH8Z04oypvMY042yD0RJkqSajOn+MqYVZUznMaYbZR+IkiRJNRnT/WVMK8qYzmNMN8o+ECVJkmoypvvLmFaUMZ3HmG6UfSBKkiTVZEz3lzGtKGM6jzHdKPtAlCRJqsmY7i9jWlHGdB5julH2gShJklSTMd1fxrSijOk8xnSj7ANRkiSpJmO6v4xpRRnTeYzpRtkHoiRJUk3GdH8Z04oypvMY042yD0RJkqSajOn+MqYVZUznMaYbZR+IkiRJNRnT/WVMK8qYzmNMN8o+ECVJkmoypvvLmFaUMZ3HmG6UfSBKkiTVZEz3lzGtKGM6jzHdKPtAlCRJqsmY7i9jWlHGdB5julH2gShJklSTMd1fxrSijOk8xnSj7ANRkiSpJmO6v4xpRRnTeYzpRtkHoiRJUk3GdH8Z04oypvMY042yD0RJkqSajOn+MqYVZUznMaYbZR+IkiRJNRnT/WVMK8qYzmNMN8o+ECVJkmoypvvLmFaUMZ3HmG6UfSBKkiTVZEz3lzGtKGM6jzHdKPtAlCRJqsmY7i9jWlHGdB5julH2gShJklSTMd1fxrSijOk8Wzumz84flp3dg8sOj07e+jWHRydLf35v/7js7B689Xt2dg/K3v7x5Y+zD0RJkqSajOn+MqYVZUzn2doxPT+OD49Oyr3T+5c/vnd6/8qvmf/5vf3jcnh0Us7OH16+7Oz8YTk8OjGmJUlStxnT/WVMK8qYzrO1Y3reMIQHe/vH5fGTp5c/fvzk6ZWRPPz87Eend3YPyvsfPDKmJUlStxnT/WVMK8qYzjOZMb23f3z5kednz1+Und2D8uz5i8ufn3/ZMKbvnd4vZ+cPLzOmJUlSzxnT/WVMK8qYzrP1Y3r43OfZj0rXjOnh5cOAnh/Tn118LknSjfT7l/lvg7av//XRhTHdWWcPLspvX76+lcfHb1++KWcPPD566s7dN+X7H633+KDN1o/pweznSNeM6eH3Dp87PT+mf/1/30iSdCP96vf5b4O2r+/92JjurbMHF+XT37++lcfHp783pnvrzt035fGP13t80GYyY3r+859X/ZzpeZ7mLUmSes7TvPvL07wV5WneebZ2TM8O3lL+/NW6Z5/qvcpX8zamJUnStmVM95cxrShjOs/WjunZ7yG97veZNqYlSdK2ZUz3lzGtKGM6z9aO6duSfSBKkiTVZEz3lzGtKGM6jzHdKPtAlCRJqsmY7i9jWlHGdB5julH2gShJklSTMd1fxrSijOk8xnSj7ANRkiSpJmO6v4xpRRnTeYzpRtkHoiRJUk3GdH8Z04oypvMY042yD0RJkqSajOn+MqYVZUznMaYbZR+IkiRJNRnT/WVMK8qYzmNMN8o+ECVJkmoypvvLmFaUMZ3HmG6UfSBKkiTVZEz3lzGtKGM6jzHdKPtAlCRJqsmY7i9jWlHGdB5julH2gShJklSTMd1fxrSijOk8xnSj7ANRkiSpJmO6v4xpRRnTeYzpRtkHoiRJUk3GdH8Z04oypvMY042yD0RJkqSajOn+MqYVZUznMaYbZR+IkiRJNRnT/WVMK8qYzmNMN8o+ECVJkmoypvvLmFaUMZ3HmG6UfSBKkiTVZEz3lzGtKGM6jzHdKPtAlCRJqsmY7i9jWlHGdB5julH2gShJklSTMd1fxrSijOk8xnSj7ANRkiSpJmO6v4xpRRnTeYzpRtkHoiRJUk3GdH8Z04oypvMY042yD0RJkqSajOn+MqYVZUznMaYbZR+IkiRJNRnT/WVMK8qYzmNMN8o+ECVJkmoypvvLmFaUMZ3HmG6UfSBKkiTVZEz3lzGtKGM6jzHdKPtAlCRJqsmY7i9jWlHGdB5julH2gShJklSTMd1fxrSijOk8xnSj7ANRkiSpJmO6v4xpRRnTeYzpRtkHoqTp9dHPL8o/fvdV+YfvqJf+5w9ell98cjs3wtJYxnR/GdOKMqbzGNONsg9ESdPLjXB/3eaNsDSWa0h/GdOKMqbzGNONsg9ESdPLjXB/GdPapFxD+suYVpQxnceYbpR9IEqaXm6E+8uY1iblGtJfxrSijOk8xnSj7ANR0vRyI9xfxrQ2KdeQ/jKmFWVM5zGmG2UfiJKmlxvh/jKmtUm5hvSXMa0oYzqPMd0o+0CUNL3cCPeXMa1NyjWkv4xpRRnTeYzpRtkHoqTp5Ua4v4xpbVKuIf1lTCvKmM5jTDfKPhAlTS83wv1lTGuTcg3pL2NaUcZ0nq0d0/dO75ed3YPLDo9O3vo1h0cnS39+b/+47OwevPV7dnYPyt7+8eWPsw9ESdPLjXB/GdPapFxD+suYVpQxnWdrx/Ts4B1+fHb+8PLH907vXxnQh0cn5d7p/Su//vDo5MrvOTt/WA6PToxpSam5Ee4vY1qblGtIfxnTijKm82ztmJ43DOHB3v5xefzk6eWPHz95emUkDz8/+9Hpnd2D8v4Hj4xpSam5Ee4vY1qblGtIfxnTijKm80xmTM9+lPnZ8xdlZ/egPHv+4vLn5182jOl7p/fL2fnDy4xpSdm5Ee4vY1qblGtIfxnTijKm80xiTJ+dP7zyEeaaMT28fBjQ82P6j3/8kyTdak9++tqNcGd95fyiXLz5Q/X7+tXr+t8jjeUa0l/rXkPW6eLNH8tXzj0+eurO3TflydM3a72/abP1Y/r9Dx6NDudFL5t9Gvjw0enh9c2O6WcvXkrSreajSv313oOL8vGLz6rf1x9/mv940/blGtJf615D1unjFy/Lew88Pnrqzt035TtPXq31/qbNVo/p+Y9Iz1r1c6bneZq3pOzcCPeXp3lrk3IN6S9P81aUp3nn2doxfXh0svDbYQ1W+WrexrSkTcyNcH/d7o3wZ+Wjn78qP3iqnvrZL9a7EXYNmUbGtKKM6TxbOaaHp2wvanYgj32faWNa0ibmRri/bvtG+N/++4ty5+5rddK/+tevy3d+aExrc64hxnRfGdN5tnJM36bsm2pJ08uNcH+5EVZUy42wa8g0cg1RlDGdx5hulH1TLWl6uRHuLzfCijKmNZZriKKM6TzGdKPsm2pJ08uNcH+5EVaUMa2xXEMUZUznMaYbZd9US5peboT7y42wooxpjeUaoihjOo8x3Sj7plrS9HIj3F9uhBVlTGss1xBFGdN5jOlG2TfVkqaXG+H+ciOsKGNaY7mGKMqYzmNMN8q+qZY0vdwI95cbYUUZ0xrLNURRxnQeY7pR9k21pOnlRri/3AgrypjWWK4hijKm8xjTjbJvqiVNLzfC/eVGWFHGtMZyDVGUMZ3HmG6UfVMtaXq5Ee4vN8KKMqY1lmuIoozpPMZ0o+ybaknTy41wf7kRVpQxrbFcQxRlTOcxphtl31RLml5uhPvLjbCijGmN5RqiKGM6jzHdKPumWtL0ciPcX26EFWVMayzXEEUZ03mM6UbZN9WSppcb4f5yI6woY1pjuYYoypjOY0w3yr6pljS93Aj3lxthRRnTGss1RFHGdB5julH2TbWk6eVGuL/cCCvKmNZYriGKMqbzGNONsm+qJU0vN8L95UZYUca0xnINUZQxnceYbpR9Uy1perkR7i83wooypjWWa4iijOk8xnSj7JtqSdPLjXB/uRFWlDGtsVxDFGVM5zGmG2XfVEuaXm6E+8uNsKKMaY3lGqIoYzqPMd0o+6Za0vRyI9xfboQVZUxrLNcQRRnTeYzpRtk31ZKmlxvh/nIjrChjWmO5hijKmM5jTDfKvqmWNL3cCPeXG2FFGdMayzVEUcZ0HmO6UfZNtaTp5Ua4v9wIK8qY1liuIYoypvMY042yb6olTS83wv3lRlhRxrTGcg1RlDGdx5hutM6D9tmnL8uzT1+po27rBkdaJTfC/eVGWFHGtMZyDVGUMZ3HmG60zoP2J//7Zflv374o33ykXvofH74qv7ilmxxpLDfC/eVGWFHGtMZyDVGUMZ3HmG7kEJtGt3mISWO5hvSXG2FFGdMayzVEUcZ0HmO6kUNsGhnT2qRcQ/rLjbCijGmN5RqiKGM6jzHdyCE2jYxpbVKuIf3lRlhRxrTGcg1RlDGdx5hu5BCbRsa0NinXkP5yI6woY1pjuYYoypjOY0w3cohNI2Nam5RrSH+5EVaUMa2xXEMUZUznMaYbOcSmkTGtTco1pL/cCCvKmNZYriGKMqbzGNONHGLTyJjWJuUa0l9uhBVlTGss1xBFGdN5jOlGDrFpZExrk3IN6S83wooypjWWa4iijOk8xnQjh9g0Mqa1SbmG9JcbYUUZ0xrLNURRxnQeY7qRQ2waGdPapFxD+suNsKKMaY3lGqIoYzrP1o/p9z94VPb2jxf+3OHRSdnZPSg7uwfl8Ojkys/t7R+Xnd2Dt37Pzu7BldfnEJtGxrQ2KdeQ/nIjrChjWmO5hijKmM6ztWP68ZOnl0N50Zi+d3r/yoA+PDop907vX/54b/+4HB6dlLPzh5cvOzt/WA6PTozpCWZMa5NyDekvN8KKMqY1lmuIoozpPFs7pgfLPjK9t39cHj95evnjx0+eXvl1w8/PfnR6Z/fgrdfnEJtGxrQ2KdeQ/nIjrChjWmO5hijKmM4zyTH97PmLsrN7UJ49f7H0ZcOYvnd6v5ydP7xs/vX9n1+/qu47T145xDrrvQcX5Ze/ernW+1tb3G9y+s4PXUN6670HF+WXv35Z/b5+vsbv+eWvX5X3Hnh89NSdu2/Kd3944Rqipd3mfcgvf+Ua0luX15A13t+0MaaXvGwY08PLh9cx//pef/7H6j78yWuHWGednV+Uzy4+X+v9rS3uTU6uIf11eQ2pfF9/9mqN33Pxh3J27vHRU3fuvik/+Mkb1xAt7TbvQ1xD+uvyGrLG+5s2xvSSl80+DXz46PSi1+fpVdPI07y1SbmG9JenaCrK07w1lmuIojzNO88kx3Qpq3/O9Njrc4hNI2Nam5RrSH+5EVaUMa2xXEMUZUznmeyYXuWreRvTGjKmtUm5hvSXG2FFGdMayzVEUcZ0nq0d07PfGmto9ttclTL+faaNaQ0Z09qkXEP6y42wooxpjeUaoihjOs/Wjunb4hCbRsa0NinXkP5yI6woY1pjuYYoypjOY0w3cohNI2Nam8W5Cy8AAAkFSURBVJRrSH+5EVaUMa2xXEMUZUznMaYbOcSmkTGtTco1pL/cCCvKmNZYriGKMqbzGNONHGLTyJjWJuUa0l9uhBVlTGss1xBFGdN5jOlGDrFpZExrk3IN6S83wooypjWWa4iijOk8xnQjh9g0Mqa1SbmG9JcbYUUZ0xrLNURRxnQeY7qRQ2waGdPapFxD+suNsKKMaY3lGqIoYzqPMd3IITaNjGltUq4h/eVGWFHGtMZyDVGUMZ3HmG7kEJtGxrQ2KdeQ/nIjrChjWmO5hijKmM5jTDdyiE0jY1qblGtIf7kRVpQxrbFcQxRlTOcxphs5xKbR7Y/pV+XjF+qqW3tsuIb0mBthRRnTGss1RFHGdB5jupFDbBrd9pj+L9+6KP/ua6/VSX//ny7Khz91I6zNuIa4Ee4vY1pjuYYoypjOY0w3cohNI4eYotwIayzXEEW5hmgs1xBFGdN5jOlGDrFp5BBTlBthjeUaoijXEI3lGqIoYzqPMd3IITaNHGKKciOssVxDFOUaorFcQxRlTOcxphs5xKaRQ0xRboQ1lmuIolxDNJZriKKM6TzGdCOH2DRyiCnKjbDGcg1RlGuIxnINUZQxnceYbuQQm0YOMUW5EdZYriGKcg3RWK4hijKm8xjTjRxi08ghpig3whrLNURRriEayzVEUcZ0HmO6kUNsGjnEFOVGWGO5hijKNURjuYYoypjOY0w3cohNI4eYotwIayzXEEW5hmgs1xBFGdN5jOlGDrFp5BBTlBthjeUaoijXEI3lGqIoYzqPMd3IITaNHGKKciOssVxDFOUaorFcQxRlTOcxphs5xKaRQ0xRboQ1lmuIolxDNJZriKKM6TzGdCOH2DRyiCnKjbDGcg1RlGuIxnINUZQxnceYbuQQm0YOMUW5EdZYriGKcg3RWK4hijKm8xjTjRxi08ghpig3whrLNURRriEayzVEUcZ0HmO6kUNsGjnEFOVGWGO5hijKNURjuYYoypjOY0w3cohNI4eYotwIayzXEEW5hmgs1xBFGdN5jOlGDrFp5BBTlBthjeUaoijXEI3lGqIoYzqPMd3IITaNHGKKciOssVxDFOUaorFcQxRlTOcxphs5xKaRQ0xRboQ1lmuIolxDNJZriKKM6TzGdCOH2DRyiCnKjbDGcg1RlGuIxnINUZQxnceYbuQQm0YOMUW5EdZYriGKcg3RWK4hijKm8xjTjRxi08ghpig3whrLNURRriEayzVEUcZ0HmO6kUNsGjnEFOVGWGO5hijKNURjuYYoypjOY0yv4PDopOzsHpSd3YNyeHRy5eccYtPIIaYoN8IayzVEUa4hGss1RFHGdB5jesS90/tXBvTh0Um5d3r/8scOsWnkEFOUG2GN5RqiKNcQjeUaoihjOo8xPWJv/7g8fvL08sePnzwte/vHlz92iE0jh5ii3AhrLNcQRbmGaCzXEEUZ03mM6cCz5y/Kzu5Befb8xdKXOcSmkUNMUW6ENZZriKJcQzSWa4iijOk8xnRglTG9jh/97PPy13/zuvybU/XSf3x4Ud784Y/X8bAa9eYPfypfe3iR/mfW6v3137wuP/7Z57fy+CjFNaTHXEMU5RqisVxDFHXb1xD+wpgO3NSYBgAAoG/G9Iixz5kGAABgeozpEWNfzRsAAIDpMaZXEH2faQAAAKbHmOYt907vX/7jwbJ/QFjlHxje/+DRwqfEn50/vPL6h+jHTT9GBrP/jfc/eHRtbz836yYfH8PXrXAN6dttXEP29o89Pjp1248PHyjpT+tj5LoeY2BM85b5g2dv/7icnT+8/PHYU98fP3l6efFZNqZdlPp204+RYTAZ0H266cfHvLPzhz79pjM3/RiZ//Xzr4/NdtOPj/nXt7d/7BrSmdbHSOvvh4Exzaj58bvqF2WLPjLtpma7XPdj5N7p/SuHGn277sfHPN9hoX/X/RjZ2z++8o9xqz6W2EzX+fgYhvbYy+jLuo+R6/r9TJcxzajDo5PLYVPz7cJWfZq3i1P/rvsxMjwuZh8nxlK/rvvxMctHpbfDTZ0zw+ucff305zofH4uGs2972r91HyPX9fuZLmOa0HBDMrjuG+FS/nwB85Hqfl33Y2T4tbP/Ijz/36AfN30NcXPTv5t4jAy/fvYf5ejTTTw+Zv+hJfr99KHlMXIdv59pM6ZZ6v0PHq10MWkd055e1a+beIws+7XzA5vNd9PXkHun931UunM39RjxD3Lb4aYeH8u+kCH9uY7HSMvvB2OahaIbj+v+fEdjuk83+RhZNJyN6b7c9DXEjU3/buox4kZ4O9zmfYhPF+lT62PkOh5jYEzzlrGnXa/6FQ6jLwwz/2OHWF9u+jFy7/T+lZefnT90iHXkph8fw+tw3ejXTT9GdnYPrvx615C+3MY1ZDD8g75/aOlL62Pkuh5jYExzRfQ9XGf/hS763nuz35JiaPZzk2Z/7/wND5vvNh4jpVz9HpBugvtxG4+PRU/Lox+3dQ2Z/TnXkH7c5jXE07v71PoYuY7HGAyMaQAAAKhkTAMAAEAlYxoAAAAqGdMAAABQyZgGAACASsY0AAAAVDKmAQAAoJIxDQAAAJWMaQAAAKhkTAMAAEAlYxoAAAAqGdMAAABQyZgGAACASsY0AAAAVDKmAQAAoJIxDQAAAJWMaQAAAKhkTAMAAEAlYxoAAAAqGdMAAABQyZgGAACASsY0AAAAVDKmAQAAoJIxDQAAAJWMaQAAAKhkTAMAAEAlYxoAAAAqGdMAAABQyZgGAACASsY0AAAAVDKmAQAAoJIxDQAAAJWMaQAAAKhkTAMAAEAlYxoAAAAqGdMAAABQyZgGAACASsY0AAAAVDKmAQAAoJIxDQAAAJWMaQAAAKhkTAMAAEAlYxoAAAAqGdMAAABQyZgGAACASsY0AAAAVDKmAQAAoJIxDQAAAJWMaQAAAKhkTAMAAEAlYxoAAAAqGdMAAABQyZgGAACASsY0AAAAVDKmAQAAoJIxDQAAAJWMaQAAAKhkTAMAAEAlYxoAAAAqGdMAAABQyZgGAACASsY0AAAAVDKmAQAAoJIxDQAAAJWMaQAAAKhkTAMAAEAlYxoAAAAq/T9klkIyY8fgoAAAAABJRU5ErkJggg==", + "text/html": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "fig = go.Figure(data = go.Bar(x = amt_df.index,\n", + " y= amt_df['mean']),\n", + " layout_title_text = 'Avg. investments by year')\n", + "fig.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Insight:\n", + "* When it comes to the mean value of funding, 2020 leads the pack with an average of 55 Million USD.\n", + "* But the year has just started, should the mean funding of 2020 be considered or there is something we are missing? Check the number of funds raised in the year 2020, it is pretty less.\n", + "* We will consider 2019 data as valid data for mean funding." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Task3: Investment Type\n", + "\n", + "Now let us explore the investment type of the funding deals like whether it is seed funding, private equity funding or so on.\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Can we get an idea about the number and value of funding deals with respect to the investment type?" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "43" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.InvestmentType.nunique()" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Seed Funding 1393\n", + "Private Equity 1357\n", + "Seed Angel Funding 141\n", + "Debt Funding 25\n", + "Series A 24\n", + "Series B 20\n", + "Series C 14\n", + "Series D 12\n", + "PreSeries A 8\n", + "Seed 4\n", + "Name: InvestmentType, dtype: int64" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "top10_inv_type = df.InvestmentType.value_counts()[:10]\n", + "top10_inv_type \n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "https://stackoverflow.com/a/509295/8210613 [start : stop : step ]" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.plotly.v1+json": { + "config": { + "plotlyServerURL": "https://plot.ly" + }, + "data": [ + { + "orientation": "h", + "type": "bar", + "x": [ + 4, + 8, + 12, + 14, + 20, + 24, + 25, + 141, + 1357, + 1393 + ], + "y": [ + "Seed", + "PreSeries A", + "Series D", + "Series C", + "Series B", + "Series A", + "Debt Funding", + "Seed Angel Funding", + "Private Equity", + "Seed Funding" + ] + } + ], + "layout": { + "autosize": true, + "template": { + "data": { + "bar": [ + { + "error_x": { + "color": "#2a3f5f" + }, + "error_y": { + "color": "#2a3f5f" + }, + "marker": { + "line": { + "color": "#E5ECF6", + "width": 0.5 + } + }, + "type": "bar" + } + ], + "barpolar": [ + { + "marker": { + "line": { + "color": "#E5ECF6", + "width": 0.5 + } + }, + "type": "barpolar" + } + ], + "carpet": [ + { + "aaxis": { + "endlinecolor": "#2a3f5f", + "gridcolor": "white", + "linecolor": "white", + "minorgridcolor": "white", + "startlinecolor": "#2a3f5f" + }, + "baxis": { + "endlinecolor": "#2a3f5f", + "gridcolor": "white", + "linecolor": "white", + "minorgridcolor": "white", + "startlinecolor": "#2a3f5f" + }, + "type": "carpet" + } + ], + "choropleth": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "choropleth" + } + ], + "contour": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "contour" + } + ], + "contourcarpet": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "contourcarpet" + } + ], + "heatmap": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "heatmap" + } + ], + "heatmapgl": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "heatmapgl" + } + ], + "histogram": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "histogram" + } + ], + "histogram2d": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "histogram2d" + } + ], + "histogram2dcontour": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "histogram2dcontour" + } + ], + "mesh3d": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "mesh3d" + } + ], + "parcoords": [ + { + "line": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "parcoords" + } + ], + "pie": [ + { + "automargin": true, + "type": "pie" + } + ], + "scatter": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatter" + } + ], + "scatter3d": [ + { + "line": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatter3d" + } + ], + "scattercarpet": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattercarpet" + } + ], + "scattergeo": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattergeo" + } + ], + "scattergl": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattergl" + } + ], + "scattermapbox": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattermapbox" + } + ], + "scatterpolar": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterpolar" + } + ], + "scatterpolargl": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterpolargl" + } + ], + "scatterternary": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterternary" + } + ], + "surface": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "surface" + } + ], + "table": [ + { + "cells": { + "fill": { + "color": "#EBF0F8" + }, + "line": { + "color": "white" + } + }, + "header": { + "fill": { + "color": "#C8D4E3" + }, + "line": { + "color": "white" + } + }, + "type": "table" + } + ] + }, + "layout": { + "annotationdefaults": { + "arrowcolor": "#2a3f5f", + "arrowhead": 0, + "arrowwidth": 1 + }, + "coloraxis": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "colorscale": { + "diverging": [ + [ + 0, + "#8e0152" + ], + [ + 0.1, + "#c51b7d" + ], + [ + 0.2, + "#de77ae" + ], + [ + 0.3, + "#f1b6da" + ], + [ + 0.4, + "#fde0ef" + ], + [ + 0.5, + "#f7f7f7" + ], + [ + 0.6, + "#e6f5d0" + ], + [ + 0.7, + "#b8e186" + ], + [ + 0.8, + "#7fbc41" + ], + [ + 0.9, + "#4d9221" + ], + [ + 1, + "#276419" + ] + ], + "sequential": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "sequentialminus": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ] + }, + "colorway": [ + "#636efa", + "#EF553B", + "#00cc96", + "#ab63fa", + "#FFA15A", + "#19d3f3", + "#FF6692", + "#B6E880", + "#FF97FF", + "#FECB52" + ], + "font": { + "color": "#2a3f5f" + }, + "geo": { + "bgcolor": "white", + "lakecolor": "white", + "landcolor": "#E5ECF6", + "showlakes": true, + "showland": true, + "subunitcolor": "white" + }, + "hoverlabel": { + "align": "left" + }, + "hovermode": "closest", + "mapbox": { + "style": "light" + }, + "paper_bgcolor": "white", + "plot_bgcolor": "#E5ECF6", + "polar": { + "angularaxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + }, + "bgcolor": "#E5ECF6", + "radialaxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + } + }, + "scene": { + "xaxis": { + "backgroundcolor": "#E5ECF6", + "gridcolor": "white", + "gridwidth": 2, + "linecolor": "white", + "showbackground": true, + "ticks": "", + "zerolinecolor": "white" + }, + "yaxis": { + "backgroundcolor": "#E5ECF6", + "gridcolor": "white", + "gridwidth": 2, + "linecolor": "white", + "showbackground": true, + "ticks": "", + "zerolinecolor": "white" + }, + "zaxis": { + "backgroundcolor": "#E5ECF6", + "gridcolor": "white", + "gridwidth": 2, + "linecolor": "white", + "showbackground": true, + "ticks": "", + "zerolinecolor": "white" + } + }, + "shapedefaults": { + "line": { + "color": "#2a3f5f" + } + }, + "ternary": { + "aaxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + }, + "baxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + }, + "bgcolor": "#E5ECF6", + "caxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + } + }, + "title": { + "x": 0.05 + }, + "xaxis": { + "automargin": true, + "gridcolor": "white", + "linecolor": "white", + "ticks": "", + "title": { + "standoff": 15 + }, + "zerolinecolor": "white", + "zerolinewidth": 2 + }, + "yaxis": { + "automargin": true, + "gridcolor": "white", + "linecolor": "white", + "ticks": "", + "title": { + "standoff": 15 + }, + "zerolinecolor": "white", + "zerolinewidth": 2 + } + } + }, + "title": { + "text": "Top 10 Investment types" + }, + "xaxis": { + "autorange": true, + "range": [ + 0, + 1466.3157894736842 + ], + "type": "linear" + }, + "yaxis": { + "autorange": true, + "range": [ + -0.5, + 9.5 + ], + "type": "category" + } + } + }, + "image/png": "", + "text/html": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "fig = go.Figure(data = go.Bar(y = top10_inv_type.index[::-1],\n", + " x= top10_inv_type.values[::-1],\n", + " orientation='h'),\n", + " layout_title_text = 'Top 10 Investment types')\n", + "fig.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Insight: \n", + "\n", + "\n", + "* Seed funding tops the chart closely followed by Private Equity and seed angel funding\n", + " \n", + "* We can clearly see the decreasing number of deals as we move up the stages of funding rounds like Series A, B, C & D\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sizesummean
InvestmentType
Seed Funding13937.757209e+081.038448e+06
Private Equity13572.672787e+102.493271e+07
Seed Angel Funding1412.256960e+082.051782e+06
Debt Funding251.509204e+086.288348e+06
Series A242.032000e+089.236364e+06
Series B209.491957e+084.745979e+07
Series C141.044718e+097.462274e+07
Series D121.481799e+091.234832e+08
PreSeries A84.137200e+075.910286e+06
Seed45.280000e+071.320000e+07
\n", + "
" + ], + "text/plain": [ + " size sum mean\n", + "InvestmentType \n", + "Seed Funding 1393 7.757209e+08 1.038448e+06\n", + "Private Equity 1357 2.672787e+10 2.493271e+07\n", + "Seed Angel Funding 141 2.256960e+08 2.051782e+06\n", + "Debt Funding 25 1.509204e+08 6.288348e+06\n", + "Series A 24 2.032000e+08 9.236364e+06\n", + "Series B 20 9.491957e+08 4.745979e+07\n", + "Series C 14 1.044718e+09 7.462274e+07\n", + "Series D 12 1.481799e+09 1.234832e+08\n", + "PreSeries A 8 4.137200e+07 5.910286e+06\n", + "Seed 4 5.280000e+07 1.320000e+07" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "top10_inv_type_amt = df.groupby('InvestmentType').CleanedAmount.agg(['size','sum','mean']).sort_values('size',ascending=False)[:10]\n", + "top10_inv_type_amt" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.plotly.v1+json": { + "config": { + "plotlyServerURL": "https://plot.ly" + }, + "data": [ + { + "orientation": "h", + "type": "bar", + "x": [ + 52800000, + 41372000, + 1481799000, + 1044718394, + 949195735, + 203200000, + 150920354, + 225696010, + 26727868165, + 775720908 + ], + "y": [ + "Seed", + "PreSeries A", + "Series D", + "Series C", + "Series B", + "Series A", + "Debt Funding", + "Seed Angel Funding", + "Private Equity", + "Seed Funding" + ] + } + ], + "layout": { + "autosize": true, + "template": { + "data": { + "bar": [ + { + "error_x": { + "color": "#2a3f5f" + }, + "error_y": { + "color": "#2a3f5f" + }, + "marker": { + "line": { + "color": "#E5ECF6", + "width": 0.5 + } + }, + "type": "bar" + } + ], + "barpolar": [ + { + "marker": { + "line": { + "color": "#E5ECF6", + "width": 0.5 + } + }, + "type": "barpolar" + } + ], + "carpet": [ + { + "aaxis": { + "endlinecolor": "#2a3f5f", + "gridcolor": "white", + "linecolor": "white", + "minorgridcolor": "white", + "startlinecolor": "#2a3f5f" + }, + "baxis": { + "endlinecolor": "#2a3f5f", + "gridcolor": "white", + "linecolor": "white", + "minorgridcolor": "white", + "startlinecolor": "#2a3f5f" + }, + "type": "carpet" + } + ], + "choropleth": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "choropleth" + } + ], + "contour": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "contour" + } + ], + "contourcarpet": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "contourcarpet" + } + ], + "heatmap": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "heatmap" + } + ], + "heatmapgl": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "heatmapgl" + } + ], + "histogram": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "histogram" + } + ], + "histogram2d": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "histogram2d" + } + ], + "histogram2dcontour": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "histogram2dcontour" + } + ], + "mesh3d": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "mesh3d" + } + ], + "parcoords": [ + { + "line": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "parcoords" + } + ], + "pie": [ + { + "automargin": true, + "type": "pie" + } + ], + "scatter": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatter" + } + ], + "scatter3d": [ + { + "line": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatter3d" + } + ], + "scattercarpet": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattercarpet" + } + ], + "scattergeo": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattergeo" + } + ], + "scattergl": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattergl" + } + ], + "scattermapbox": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattermapbox" + } + ], + "scatterpolar": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterpolar" + } + ], + "scatterpolargl": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterpolargl" + } + ], + "scatterternary": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterternary" + } + ], + "surface": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "surface" + } + ], + "table": [ + { + "cells": { + "fill": { + "color": "#EBF0F8" + }, + "line": { + "color": "white" + } + }, + "header": { + "fill": { + "color": "#C8D4E3" + }, + "line": { + "color": "white" + } + }, + "type": "table" + } + ] + }, + "layout": { + "annotationdefaults": { + "arrowcolor": "#2a3f5f", + "arrowhead": 0, + "arrowwidth": 1 + }, + "coloraxis": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "colorscale": { + "diverging": [ + [ + 0, + "#8e0152" + ], + [ + 0.1, + "#c51b7d" + ], + [ + 0.2, + "#de77ae" + ], + [ + 0.3, + "#f1b6da" + ], + [ + 0.4, + "#fde0ef" + ], + [ + 0.5, + "#f7f7f7" + ], + [ + 0.6, + "#e6f5d0" + ], + [ + 0.7, + "#b8e186" + ], + [ + 0.8, + "#7fbc41" + ], + [ + 0.9, + "#4d9221" + ], + [ + 1, + "#276419" + ] + ], + "sequential": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "sequentialminus": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ] + }, + "colorway": [ + "#636efa", + "#EF553B", + "#00cc96", + "#ab63fa", + "#FFA15A", + "#19d3f3", + "#FF6692", + "#B6E880", + "#FF97FF", + "#FECB52" + ], + "font": { + "color": "#2a3f5f" + }, + "geo": { + "bgcolor": "white", + "lakecolor": "white", + "landcolor": "#E5ECF6", + "showlakes": true, + "showland": true, + "subunitcolor": "white" + }, + "hoverlabel": { + "align": "left" + }, + "hovermode": "closest", + "mapbox": { + "style": "light" + }, + "paper_bgcolor": "white", + "plot_bgcolor": "#E5ECF6", + "polar": { + "angularaxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + }, + "bgcolor": "#E5ECF6", + "radialaxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + } + }, + "scene": { + "xaxis": { + "backgroundcolor": "#E5ECF6", + "gridcolor": "white", + "gridwidth": 2, + "linecolor": "white", + "showbackground": true, + "ticks": "", + "zerolinecolor": "white" + }, + "yaxis": { + "backgroundcolor": "#E5ECF6", + "gridcolor": "white", + "gridwidth": 2, + "linecolor": "white", + "showbackground": true, + "ticks": "", + "zerolinecolor": "white" + }, + "zaxis": { + "backgroundcolor": "#E5ECF6", + "gridcolor": "white", + "gridwidth": 2, + "linecolor": "white", + "showbackground": true, + "ticks": "", + "zerolinecolor": "white" + } + }, + "shapedefaults": { + "line": { + "color": "#2a3f5f" + } + }, + "ternary": { + "aaxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + }, + "baxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + }, + "bgcolor": "#E5ECF6", + "caxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + } + }, + "title": { + "x": 0.05 + }, + "xaxis": { + "automargin": true, + "gridcolor": "white", + "linecolor": "white", + "ticks": "", + "title": { + "standoff": 15 + }, + "zerolinecolor": "white", + "zerolinewidth": 2 + }, + "yaxis": { + "automargin": true, + "gridcolor": "white", + "linecolor": "white", + "ticks": "", + "title": { + "standoff": 15 + }, + "zerolinecolor": "white", + "zerolinewidth": 2 + } + } + }, + "title": { + "text": "Top 10 Investment types against their amount" + }, + "xaxis": { + "autorange": true, + "range": [ + 0, + 28134598068.42105 + ], + "type": "linear" + }, + "yaxis": { + "autorange": true, + "range": [ + -0.5, + 9.5 + ], + "type": "category" + } + } + }, + "image/png": "iVBORw0KGgoAAAANSUhEUgAAA9MAAAHCCAYAAADsGJpSAAAgAElEQVR4nO3dXW8k2X2YcX0efgle8yuIF7wQwAsh0G4QkEkmMQxijSgIOEwujE0C5gVEQsOAEV/Y5I3Ai2CRIEygUNHSGmm50qxXiUYe25MYhu0ZcmSfXKyLW11dXedfPDzsJuv3AA8w7O7qPv1SNf3wVBW/kQAAAAAAwCi+sewBAAAAAADw2BDTAAAAAACMREwDAAAAADASMQ0AAAAAwEjENAAAAAAAIxHTAAAAAACMREwDAAAAADASMQ0AAAAAwEjENAAAAAAAIxHTAAAAAACMREwDAAAAADASMQ0AAAAAwEjENAAAAAAAIxHTAAAAAACMREwDAAAAADASMQ0AAAAAwEjENAAAAAAAIxHTAAAAAACMREwDAAAAADASMQ0AAAAAwEjENAAAAAAAIxHTAAAAAACMREwDAAAAADASMQ0AAAAAwEjENAAAAAAAIxHTAAAAAACMREwDAAAAADASMQ0AAAAAwEjENAAAAAAAIxHTAAAAAACMREwDAAAAADASMQ0AAAAAwEjENAAAAAAAIxHTAAAAAACMREwDAAAAADASMQ0AAAAAwEjENABUYm19K/3mv/vdZQ8DAAAAFXgSMb22vhXyd37/Py9lfJ+c/zCtrW8N3uZ3fv8/z4z1m9/+KHz/3/z2R3PP9U/e/FnhqB+GZ989HPVcl81dxiuo7o/ms16LZj0cs/4MfSbu+71vtiXL2pY9Rh7bNgYAADwenkRMd/nNf/e7KxWUTeAuom+8Y78A/vjzLx/ll+zH9kVXTC8XMS2mx/LYtjEAAODxIKYr0cRt2z7+5M2f9X45bi7/5PyHox7vsX3JfmxfdMX002bVYxrjeWzbGAAA8HiYXExHdqduf6Hu7kJ9l0BvxtNHM9P048+/nLvum9/+KD377mHoMRbFdHP5J+c/vB1HYzvUI69Ze4xN7Lftew7dx2w/xrPvHvbujt9c34RI9z1rnuPQ8+m+jkO7+0ff79x4++i7/Te//dHt67cotNbWt27f++h7eJ/vzRB9yw0t271d8zq2b993qELfutkXRs3r2KxLjX2vbfc27dew+znLfa6a8UQ+w5Gx9b223fW//VloaH9+2+MZGnffZ2TRuO5rPcy9rn3PrT3W9npbc50FAACIMqmYbr5Y5S5rf2ns+2J21/H0MTQTNmZGJRfTQ1/AU1o8Q57SV6HT/pLdt6tpc1n7Mfper29++6OZ55Sb1Vv0OIsu7wv+dpD0RezQ+90d233OTC/6nHbfm9x72H7O9/neLKIJq9xzWfQLg0Ux3Q3+vvEsiunu4wxFZ/txuuOuMTMdGVvzfHPvydDzWvRLkz6a96bvsu77Vboe9m2bhi4bE9O11lkAAIAIk4npRV/UUpqPnUVfqIeCMzKe6Fgb7jOmI19Q+wKrWb795XgoDtvjXXS77mOO2UU2t1t85L6bL/y5gOoLg/uM6UXPpfvLi6HPbnfvhft8b8bQd1jCotcqGqzd92nRfS56Lt3XZujzUDOmI2PLbXOa1zU3M11K93Oy6Dncx3q46JdGY2em29zXOgsAABBhMjE9ZgZ46LZjdr3ujic61kXjGuI+Yrrvtt0xDIVd87o1NDNtQ4F2lxCJXr7odt1fECwrpvvur++XF0Ovefuzdd/vzRB95wTo3t9QsA/NyHfNvf7RYG1eq6H1d1kxnbuP5nW975hetBt05Dnc13o49IuClMQ0AABYXSYT00NB252RfciYfqjdvCNfUFOafX59s419x5x2zR0LG/2iW/IlftExoW27x8ouI6a778+z7x6GjpNtaH+27vu9WUSzXHs83c/T0F4ciw4x6I4v+vpHg7X9ei36BcCyYrrvvWjb3Md9xXTz2nbH3betuo/1sO9z0N1miWkAAPDYmExM39fM9FAY5cbTx0OegKzNoi+57efenc1s31/02My+x2zfZ+2Z6cjM6zJjurnPZ989XHgG96GYbo/nvt+bPqKfpzExXfr6j4npvtu0H2OZM9ORdf2+YnrRmO87poduZ2YaAAA8diYT0/dxzPTQfUTG00ftP401NqZT+vr16B67271+iCbGu3Rf275jNHOPE708+iV6zBfzofEuYtHr2H2cRfc95rN7n+/N0Fi7wd73eVoUs32fgb71475jOhJ0fcdp57jLZzh6zHRuvGOWHXr8hhoxvexjpu+yzgIAAESYTEyn1H8W4aGzeXeDYW19+HjL3HjGjHfsbMp9xnR7V9i+L+h9Z4xu7rMZc9+ZtFOaPzvx0Mx86Zf4RWNoxnGXmdGh8S4i9/4341kUzIve277P7n2+N330nfG5PaudO4t4+1jr5vXuu117l/X7ium+s2V31727nGTwLp/hvpjtG18zxvs+AdnQoTD3HdPNmPsuGzrMJKXFn63a6ywAAECEScV0SuP+znT3OMYxu3cvOkHTovuJjGsRfcdb3mW2pzv2oV8cLDomue+EZm2HZsO6Yy/9Ej/0+kRmBRd9CV803iHaY4juXtvQvI7d57HoM3Kf703k/tt/N7v7eeoex/3su4e9hw/0/R3mGrt5970uXbpjiewdMvYznJsZXjTG+zwBWfex2n/HvM19rYfd5xT5e9SLPlsPsc4CAADkeJIxXcp9/qkZIEduNvSuhxesKna7BQAAwFNATPcgpvGQDO1JkdLjjen2ruUNj/W5AAAAAF3EdA9iGg9F3zHIXR5rgC7a3dyxqwAAAHgKiGkAAAAAAEYipgEAAAAAGImYBgAAAABgJGIaAAAAAICRiGkAAAAAAEYipgEAAAAAGImYBgAAAABgJGIaAAAAAICRiGkAAAAAAEYipgEAAAAAGImYBgAAAABgJGIaAAAAAICRiGkAAAAAAEYipgEAAAAAGImYBgAAAABgJGIaAAAAAICRiGkAAAAAAEYipgEAAAAAGImYBgAAAABgJGIaAAAAAICRiGkAAAAAAEYipgEAAAAAGImYBgAAAABgJGIaAAAAAICRiGkAAAAAAEYipgEAAAAAGImYBgAAAABgJGIaAAAAAICRiGkAAAAAAEYipgEAAAAAGImYBgAAAABgJGIaAAAAAICRiGkAAAAAAEYipgEAAAAAGImYBgAAAABgJGIaAAAAAICRiGkAAAAAAEYipgEAAAAAGImYBgAAAABgJGIaAAAAAICRiGkAAAAAAEYipgEAAAAAGImYBgAAAABgJGIaAAAAAICRiGkAAAAAAEYipgEAAAAAGImYBgAAAABgJGJ6gvziT/+KXOj1+79Of/xn75Y+Dq6ur/70r9Lf2JYw45s/v05vr3+19HFwtf2Lt+/T//uLm6WPg6vtr/76b9If/d+3Sx/HUxRliOkJsuyVlqutmGZOMc2IYpoRxTQjiul6ogwxPTE+e3mTLl68vTf/54u36epLG7enpJhmTjHNiGKaEcU0I4rpeqIMMT0x9j++Th88u7k3//7eTfofn9q4PSXFNHOKaUYU04wophlRTNcTZYjpifHR8+v0re/c3Jt/Z/cm/fcf2rg9JcU0c4ppRhTTjCimGVFM1xNliOmJIaaZU0wzp5hmRDHNiGKaEcV0PVGGmJ4YYpo5xTRzimlGFNOMKKYZUUzXE2WI6YkhpplTTDOnmGZEMc2IYpoRxXQ9UYaY/ltOz87TxubusoeRUpody+nZeVpb37q3+xbTzCmmmVNMM6KYZkQxzYhiup4oY6Vjem19a8btnf1qj5WL6e2d/Qcbj5jmMhXTzCmmGVFMM6KYZkQxXU+UsbIxvbG5mw6PT2YuW3ZMd8ezrLGUIKaZU0wzp5hmRDHNiGKaEcV0PVHGSsb0q9dv0tr6Vnr1+s3g7Q6PT2Zmiru3z13fnW2+a0xvbO6m07Pz25+7Mdxcf5exXFxezcxMl9xXSmKaecU0c4ppRhTTjCimGVFM1xNlrGRMp/RVNA7NRB8en8xEYjdgc9fvHRzN3H/JzHQkpttB3H3sobH0xfRd7yslMc28Ypo5xTQjimlGFNOMKKbriTJWNqZTmj9muh2sG5u76eLyau72zWW569v/Tulux0w3M8LRmelF1w+NZdHM9F3uKyUxzbximjnFNCOKaUYU04wopuuJMlY6pts0uza3Y7jPyPV9u5HXnpledH1uLGNiOvK8xDRzimnmFNOMKKYZUUwzopiuJ8p4NDGd0mxIdmdgu4y9flkxnRuLmWk+tGKaOcU0I4ppRhTTjCim64kyVjKmLy6v5o6X7v6JqL2Do7n43Ts4up2VzV2/vbOf9g6Obq/b3tm/c0x372tjc3dUTA+NZWxM556XmGZOMc2cYpoRxTQjimlGFNP1RBkrGdMp9R+j3KV7tu5uDOeub1/XF9/d8SyK6Wb36sbuyc9yATw0lrEx3Xdf7V9MiGnmFNPMKaYZUUwzophmRDFdT5SxsjGN+2Hv4GjmlwBimjnFNHOKaUYU04wophlRTNcTZYjpJ0Z3Nr67u7yYZk4xzZximhHFNCOKaUYU0/VEGWJ6Yohp5hTTzCmmGVFMM6KYZkQxXU+UIaYnhphmTjHNnGKaEcU0I4ppRhTT9UQZYnpiiGnmFNPMKaYZUUwzophmRDFdT5QhpifG84+v04fPbu7Nf7B3k/7HpzZuT0kxzZximhHFNCOKaUYU0/VEGWJ6Ylx98T5dvHh3f/74Xbr63zZuT0kxzZximhHFNCOKaUYU0/VEGWJ6gix7peVqK6aZU0wzophmRDHNiGK6nihDTE+QZa+0XG3FNHOKaUYU04wophlRTNcTZYjpCbLslZarrZhmTjHNiGKaEcU0I4rpeqIMMT1Blr3ScrUV08wpphlRTDOimGZEMV1PlCGmJ8bLL2/Sp5+9JRf6+Rc36Q8+f7f0cXC1/dmX75c+hsn5k8e1Xv7op+/S1Rf+z+GwP3l5nV787Hrp4+Bq+9Mv36fLq+WP405evU0vX63uLwJQhpieGPsfX6cPnt2QJB+ZHz6z/SbJx+av/dPr9L9+IqafKmJ6Ynz0/Dp96zs3JEmSJCv7935NTD9lxPTEENMkSZLkwyimnzZiemKIaZIkSfJhFNNPGzE9McQ0SZIk+TCK6afNk4jp07PztLa+texhPAilz1VMkyRJkg+jmH7arGRM7x0cpbX1rRkvLq8W3v4+Y3pjczednp0X3cf2zv7c+NfWt4rvN6X55zp2vGKaJEmSfBjF9NNmZWN67+Do9ueLy6tsUN8X9xXTh8cn9zSiYcQ0SZIkuZqK6afNo4jplGYDtZnlbWZ8v/fJ929naw+PT9L2zv7CZdvLra1vpY3N3ZnHbV/Xvp/D45OZ6169frNw/JGY7s5et8fRDeTTs/Pb65tfLCwa7+Hxycx9dccjpkmSJMmHUUw/bR5tTLeDsR2Yr16/mYnd7s/d+93Y3J25rG+mtxuo7bjtIxfTewdHM6Hevb9oTC8ab3sWv3n+DWKaJEmSfBjF9NPmUcR0M5vcBHF3l+9uYHZnorsz1W26M9l9cbqxuTu3i/nQbue5Y6a7y953TLdfv+5rKaZJkiTJh1FMP21WNqa7IdrerToX0+343N7Z743jRbtzL5rp7XMophfNTHdnyrvj7RvD2Jhuz0Z3xymmSZIkyYdRTD9tVjamu7tjt8nFdPs23cs3NndnQjcyMz325Ge53bxrz0w3Y2hsI6ZJkiTJh1FMP22ebEw3s9vd++ku2w3O7Z39uWX2Do7mjpHeOzhaeBKyXEx3H2N7Z3/m/rvXb2zuLozpvvGm9PWu8d3QFtMkSZLkwyimnzZPNqYX/Tmt7tm8uzHdLJc7m3fuBGS5vzPdvrwb681u2o3tE6B1n+ui8XZPPNYgpkmSJMmHUUw/bVYypqdG7uzgd2Hv4Kh3dlxMkyRJkg+jmH7aiOkV4L5juu8kZw1imiRJknwYxfTTRkxPDDFNkiRJPoxi+mkjpieGmCZJkiQfRjH9tBHTE0NMkyRJkg+jmH7aiOmJ8fxfXqcP/9ENJ+rfJfl4/ccrMAaS5Ch//Z9epx9+JqafKmJ6Yvzs5+/TD6/ecaJ++lnez//wffqDz69Dt+Xj8of35KefvUs/+/mv7u3+OO61fyz+6KfX6eqL90sfx1Nx2Z+9Wv7k5U360c9ulj6OVXfZn79l+9Mvf5UuP1/+OO7i5Wfv0s9eLT+axXQdxPQEWfZKy9X2+v1fpz/+s3dLHwdX11d/+lfpb2xLmPHNn1+nt9e/Wvo4uNr+xdv36f/9xc3Sx8HV9ld//Tfpj/7v6s7uPmZRhpieIMteabnaimnmFNOMKKYZUUwzopiuJ8oQ0xNk2SstV1sxzZximhHFNCOKaUYU0/VEGWJ6gix7peVqK6aZU0wzophmRDHNiGK6nihDTE+Mz17epIsXb2f8wYu36eUvbKD4lWKaOcU0I4ppRhTTjCim64kyxPTE2P/4On3w7GbGX/9n1+nTz22g+JVimjnFNCOKaUYU04wopuuJMsT0xPjo+fXcH5Pf+XUxza8V08wpphlRTDOimGZEMV1PlCGmJ4aYZk4xzZximhHFNCOKaUYU0/VEGWJ6Yohp5hTTzCmmGVFMM6KYZkQxXU+UIaYnhphmTjHNnGKaEcU0I4ppRhTT9UQZDxrTp2fnaWNz9yEfMszF5VVaW99a9jBSSrOv0+nZ+b2OS0wzp5hmTjHNiGKaEcU0I4rpeqKMb6ytb6W22zv71R4sGtMbm7sPHt25mN7e2U8P9VqJaS5TMc2cYpoRxTQjimlGFNP1RBnfODw+mblg2TF9cXmVtnf208bmbrq4vKo2lr7HzcV097WqRc0ZfDHNnGKaOcU0I4ppRhTTjCim64kyvvHq9ZvBGxwen8zMxnZvn7u+O6Obi8S9g6N0enae9g6O0t7B0cx1G5u7tzO1d328ReMtielmXA3dGC4Zd3dcpa+BmGZOMc2cYpoRxTQjimlGFNP1RBnfGJqJPjw+mQmxbiTmrt87OJqZ6Y7MuDZxeHF5NXfbjc3dmbDs3n/u8YbGWzum7zruvpgueQ3ENHOKaeYU04wophlRTDOimK4nypg7ZrodhX27Wq+tb91elru+/e+U8jHd7OLdd1/N4w1Fa+7xhsZ7l2Ommxnh6Mz0Xca9aGb6rq+BmGZOMc2cYpoRxTQjimlGFNP1RBkzZ/Nudh9ux3CfketfvX4ztwtyLqabXbzbP7d39R4KycjjDY33IXbzvsu4x8R05DUQ08wppplTTDOimGZEMc2IYrqeKGPuT2O1Y607y9ll7PW5mF4Uu31j67u/3OMNjXdZMZ0bt5lpPrRimjnFNCOKaUYU04wopuuJMnpnphv2Do7m4nfv4Oh25jN3/fbO/szMcnOW7j76jpFOaX638qGQzD3e0HhLYrr7uN0/7VUy7rExnXsNxDRzimnmFNOMKKYZUUwzopiuJ8qYO2a6S/fs17mzY3evb1/XF7MN3QhsaO/qnQvJvsfrnmBt0XhLYrrZvbqxe6Kzu4z7rjGdew3ENHOKaeYU04wophlRTDOimK4nypjbzfspsXdw9GB/G3pV6b4GYpo5xTRzimlGFNOMKKYZUUzXE2U8qZjuzjoP/dmvp0ruNRDTzCmmmVNMM6KYZkQxzYhiup4o40nFNPKIaeYU08wpphlRTDOimGZEMV1PlCGmJ4aYZk4xzZximhHFNCOKaUYU0/VEGWJ6Yohp5hTTzCmmGVFMM6KYZkQxXU+UIaYnxvOPr9OHz25m3Ptn1+nypzZQ/EoxzZximhHFNCOKaUYU0/VEGWJ6Ylx98T5dvHg34w9+8i69/D/LX5m5Gopp5hTTjCimGVFMM6KYrifKENMTZNkrLVdbMc2cYpoRxTQjimlGFNP1RBlieoIse6XlaiummVNMM6KYZkQxzYhiup4oQ0xPkGWvtFxtxTRzimlGFNOMKKYZUUzXE2WI6Qmy7JWWq62YZk4xzYhimhHFNCOK6XqiDDE9MT57eZMuXrxNFy/eph/8+G364pfLX4m5Wopp5hTTjCimGVFMM6KYrifKENMTY//j6/TBs5v0wbOb9NHzm/STlzZMnFVMM6eYZkQxzYhimhHFdD1RhpieGB89v07f+s5N+tZ3btI//Ogm/VhMs6OYZk4xzYhimhHFNCOK6XqiDDE9McQ0c4pp5hTTjCimGVFMM6KYrifKENMTQ0wzp5hmTjHNiGKaEcU0I4rpeqIMMT0xxDRzimnmFNOMKKYZUUwzopiuJ8qYRExvbO6m07PzZQ8jzOnZedrY3L3999r61r3dt5hmTjHNnGKaEcU0I4ppRhTT9UQZKx/TG5u7aW19a8ZXr9+Mvo+hmI7E9vbO/tw4tnf2R40jipjmMhXTzCmmGVFMM6KYZkQxXU+U8Shiuh26TVyOmWm+r5g+PD4JP2YJ7Zi+b8Q0c4pp5hTTjCimGVFMM6KYrifKeHQxnVL/bO3h8cnC2euNzd256y8ur1JKKe0dHIVmm4diui/42zHcXD80u96d+W6Wv7i8mnmuJfeVkphmXjHNnGKaEcU0I4ppRhTT9UQZjzKmU0ozQXx4fDITjH0x272+L1CHKI3p9uPtHRzNRHv35/byfTF91/tKSUwzr5hmTjHNiGKaEcU0I4rpeqKMRxvT7cs3Nndvw7qhHdt995G7vkvfMdPNjHB0ZnrR9e2xdK9fNDN9l/tKSUwzr5hmTjHNiGKaEcU0I4rpeqKMRxvT7WjsRm53V+77iunS3bz7rn/1+s3crtp3jencfaUkpplXTDOnmGZEMc2IYpoRxXQ9UcajjOnubtrd2djIfaxKTPeN38w0l6mYZk4xzYhimhHFNCOK6XqijEcX001cti/bOziaO/v13sHRwt2wu7ff3tlPewdHg+MYiunu8t1jtHMB3F1+e2f/zjE9dF8piWnmFdPMKaYZUUwzophmRDFdT5TxKGI68nemu2fr7jsBWN91KX0drHc9m3eze3Vj94RouQBOaXZX9Xbsj43pvvtqPycxzZximjnFNCOKaUYU04wopuuJMlY+plHG3sHRzC8BxDRzimnmFNOMKKYZUUwzopiuJ8oQ00+M7gx9d6ZdTDOnmGZOMc2IYpoRxTQjiul6ogwxPTHENHOKaeYU04wophlRTDOimK4nyhDTE0NMM6eYZk4xzYhimhHFNCOK6XqiDDE9McQ0c4pp5hTTjCimGVFMM6KYrifKENMT4/nH1+nDZzfpw2c36Teei2nOK6aZU0wzophmRDHNiGK6nihDTE+Mqy/ep4sX79LFi3fpBz9+l778pQ0TZxXTzCmmGVFMM6KYZkQxXU+UIaYnyLJXWq62Ypo5xTQjimlGFNOMKKbriTLE9ARZ9krL1VZMM6eYZkQxzYhimhHFdD1RhpieIMteabnaimnmFNOMKKYZUUwzopiuJ8oQ0xNk2SstV1sxzZximhHFNCOKaUYU0/VEGWJ6Ynz28iZdvHg7449+ZuPErxXTzCmmGVFMM6KYZkQxXU+UIaYnxv7H1+mDZzcznv2Xt+kXf7L8lZmroZhmTjHNiGKaEcU0I4rpeqIMMT0xPnp+nb71nZsZv/eJmObXimnmFNOMKKYZUUwzopiuJ8oQ0xNDTDOnmGZOMc2IYpoRxTQjiul6ogwxPTHENHOKaeYU04wophlRTDOimK4nyhDTE0NMM6eYZk4xzYhimhHFNCOK6XqiDDG9BE7PztPa+tZSHltMM6eYZk4xzYhimhHFNCOK6XqiDDEdZG19a8btnf0739dDxPTG5m7a2Nydu1xMM6eYZk4xzYhimhHFNCOK6XqiDDEdYGNzNx0en8xcVhLTtbm4vErbO/tpY3M3XVxezVwnpplTTDOnmGZEMc2IYpoRxXQ9UYaYzvDq9Zu0tr6VXr1+M3i7w+OTmZnr9u3X1rduZ6PX1rfS9z75/tzM9NDy7WXX1rd6Z5zb7B0cpdOz87R3cJT2Do5mrhPTzCmmmVNMM6KYZkQxzYhiup4oQ0wH2NjcHZyJPjw+mQnc07PzmZ+7AXxxeTUT00PLd2+b0lez4kNx38T4xeXVXHiLaeYU08wpphlRTDOimGZEMV1PlCGmg3SPmT49O7+9rm936rX1rdvL2v9OaT6Qh5Zvbtu9fhHNLt5940hJTDOvmGZOMc2IYpoRxTQjiul6ogwxfQea3a7bsdxnNKZzy3d3Ae/uut2m2cW7/XP79mKaOcU0c4ppRhTTjCimGVFM1xNliOk7srG5exutuZnjSExHZ56bY7jbwdx9rD4bxDRzimnmFNOMKKYZUUwzopiuJ8oQ0xm6u02nNP+nrfYOjuaOTd47OLo9rjkX00PLn56dz51JfFF89x0j3b29mGZOMc2cYpoRxTQjimlGFNP1RBliOsD2zv7Cmd6G7q7Y3ROQDcX00PLNTPSi47W74+zbBby9q7eYZk4xzZximhHFNCOKaUYU0/VEGWJ6Yohp5hTTzCmmGVFMM6KYZkQxXU+UIaYnhphmTjHNnGKaEcU0I4ppRhTT9UQZYnpiiGnmFNPMKaYZUUwzophmRDFdT5QhpieGmGZOMc2cYpoRxTQjimlGFNP1RBliemKIaeYU08wpphlRTDOimGZEMV1PlCGmJ8bzj6/Th89uZjz7r2KaXyummVNMM6KYZkQxzYhiup4oQ0xPjKsv3qeLF+9m/NHPbJz4tWKaOcU0I4ppRhTTjCim64kyxPQEWfZKy9VWTDOnmGZEMc2IYpoRxXQ9UYaYniDLXmm52opp5hTTjCimGVFMM6KYrifKENMTZNkrLVdbMc2cYpoRxTQjimlGFNP1RBlieoIse6XlaiummVNMM6KYZkQxzYhiup4oQ0xPjM9e3qSLF2/r+uO/Sl/+cvkbB95NMc2cYpoRxTQjimlGFNP1RBliemLsf3ydPnh2U9WPDm7SZ1/Y4D1WxTRzimlGFNOMKKYZUUzXE2WI6Ynx0fPr9K3v3FT1H/7GTfqJmH60imnmFNOMKKYZUUwzopiuJ8oQ0xNDTDOnmGZOMc2IYpoRxTQjiul6ogwxPTHENHOKaeYU04wopsof1LoAABcPSURBVBlRTDOimK4nyhDTE0NMM6eYZk4xzYhimhHFNCOK6XqiDDG9BE7PztPa+tZSHltMM6eYZk4xzYhimhHFNCOK6XqiDDEdZG19a8btnf0731fNmN7Y3J0baxsxzZximjnFNCOKaUYU04wopuuJMsR0gI3N3XR4fDJzWUlM12Rjczednp3f/rx3cDQzVjHNnGKaOcU0I4ppRhTTjCim64kyxHSGV6/fpLX1rfTq9ZvB2x0en8zMBrdvv7a+dTsbvba+lb73yffnZoyHlm8vu7a+lTY2dxeOoxvTp2fnM7cX08wppplTTDOimGZEMc2IYrqeKENMB9jY3B2ciT48PpkJ1m7AdgP44vJqJqaHlu/eNqWvZsUXxX03prd39mdm1cU0c4pp5hTTjCimGVFMM6KYrifKENNBuscht4N1Y3M3XVxezd2+uaz975TmA3lo+ea23esX0XfMtJjmGMU0c4ppRhTTjCimGVFM1xNliOk70Ox23Y7lPqMxnVu+uwv43sHRwrF1Z6ab+28uE9PMKaaZU0wzophmRDHNiGK6nihDTN+RdrTmZo4jMR2deW6O4e4Gc9+4Gtq7eotp5hTTzCmmGVFMM6KYZkQxXU+UIaYzXFxezR0v3f3TVnsHR3MnBds7OLo9rjkX00PLn56dz51JfCi+uzHdjW8xzZximjnFNCOKaUYU04wopuuJMsR0gO2d/cG/3ZzS/K7Y3ROQDcX00PJNDC86XrtL3zHT7duLaeYU08wpphlRTDOimGZEMV1PlCGmJ4aYZk4xzZximhHFNCOKaUYU0/VEGWJ6Yohp5hTTzCmmGVFMM6KYZkQxXU+UIaYnhphmTjHNnGKaEcU0I4ppRhTT9UQZYnpiiGnmFNPMKaYZUUwzophmRDFdT5QhpieGmGZOMc2cYpoRxTQjimlGFNP1RBliemI8//g6ffjspqq/cSCmH7NimjnFNCOKaUYU04wopuuJMsT0xLj64n26ePGuqj/48bv0xS//cukbB95NMc2cYpoRxTQjimlGFNP1RBlieoIse6XlaiummVNMM6KYZkQxzYhiup4oQ0xPkGWvtFxtxTRzimlGFNOMKKYZUUzXE2WI6Qmy7JWWq62YZk4xzYhimhHFNCOK6XqiDDE9QZa90nK1FdPMKaYZUUwzophmRDFdT5QhpifGZy9v0sWLtw/mp5/b8D02xTRzimlGFNOMKKYZUUzXE2WI6Ymx//F1+uDZzYP527/3Nv2fP1n+hoJxxTRzimlGFNOMKKYZUUzXE2WI6Ynx0fPr9K3v3DyY/+E/vRPTj0wxzZximhHFNCOKaUYU0/VEGWJ6Yohp5hTTzCmmGVFMM6KYZkQxXU+UIaYnhphmTjHNnGKaEcU0I4ppRhTT9UQZYnpiiGnmFNPMKaYZUUwzophmRDFdT5Qhph+Y07PztLa+tbTHF9PMKaaZU0wzophmRDHNiGK6nihDTAdYW9+acXtn/8739RAxvbG5OzfmBjHNnGKaOcU0I4ppRhTTjCim64kyxHSGjc3ddHh8MnNZSUzX5NXrN2ltfWtuvIfHJ7eXiWnmFNPMKaYZUUwzophmRDFdT5Qhpgdo4vTV6zeDtzs8PpmZBW7ffm1963Y2em19K33vk+/PzUwPLd9edm19K21s7i4cx/bOfto7OBocq5hmTjHNnGKaEcU0I4ppRhTT9UQZYjrDxubu4Ez04fHJTOCenp3P/NwN4IvLq5mYHlq+e9uUvgrmRXG/tr6VLi6vBp+PmGZOMc2cYpoRxTQjimlGFNP1RBliOkD3+OPTs/Pb6zY2d+cCth213cDtBvLQ8s1tc4GcUnwWXUwzp5hmTjHNiGKaEcU0I4rpeqIMMT2SZrfrdiz3GY3p3PLdXcCHduM2M837UEwzp5hmRDHNiGKaEcV0PVGGmL4DG5u7t7PTuYCNxHRk5jmlr2ef2zPjbRwzzftQTDOnmGZEMc2IYpoRxXQ9UYaYHuDi8mrueOnun7baOziaOynY3sHR7e7WuZgeWv707HzuzNxD8d3cd3eZ9v2IaeYU08wpphlRTDOimGZEMV1PlCGmM2zv7C/8m80N3V2xuycgG4rpoeWbmehFx2svortMezximjnFNHOKaUYU04wophlRTNcTZYjpiSGmmVNMM6eYZkQxzYhimhHFdD1RhpieGGKaOcU0c4ppRhTTjCimGVFM1xNliOmJIaaZU0wzp5hmRDHNiGKaEcV0PVGGmJ4YYpo5xTRzimlGFNOMKKYZUUzXE2WI6YkhpplTTDOnmGZEMc2IYpoRxXQ9UYaYnhjPP75OHz67eTB/+/euxfQjU0wzp5hmRDHNiGKaEcV0PVGGmJ4YV1+8Txcv3j2Yn37+Nv1CTD8qxTRzimlGFNOMKKYZUUzXE2WI6Qmy7JWWq62YZk4xzYhimhHFNCOK6XqiDDE9QZa90nK1FdPMKaYZUUwzophmRDFdT5QhpifIsldarrZimjnFNCOKaUYU04wopuuJMsT0BFn2SsvVVkwzp5hmRDHNiGKaEcV0PVGGmJ4YL7+8SZ9+9nZl/dFPnbBs2Ypp5hTTjCimGVFMM6KYrifKENMTY//j6/TBs5uV9T/+7nX6xZ/+5dI3LFNWTDOnmGZEMc2IYpoRxXQ9UYaYnhgfPb9O3/rOzcr6739bTC9bMc2cYpoRxTQjimlGFNP1RBliemKIaeYU08wpphlRTDOimGZEMV1PlCGmJ4aYZk4xzZximhHFNCOKaUYU0/VEGWJ6Yohp5hTTzCmmGVFMM6KYZkQxXU+UIaYfmNOz87S2vrW0xxfTzCmmmVNMM6KYZkQxzYhiup4oQ0wHWFvfmnF7Z//O91Uzpjc2d+fG+ur1m5nbiGnmFNPMKaYZUUwzophmRDFdT5QhpjNsbO6mw+OTmctKYromG5u76fTs/PbnJtzbl4lp5hTTzCmmGVFMM6KYZkQxXU+UIaYHePX6Te/sbpfD45OFs8FNzDbXfe+T78/NTA8t3152bX0rbWzuLhxHN6bbyzeIaeYU08wpphlRTDOimGZEMV1PlCGmM2xs7g7ORB8en8wE7unZ+czP3QC+uLyaiduh5bu3TemrWfFFcd8X080YLi6vUkpimnnFNHOKaUYU04wophlRTNcTZYjpAN3jkNvBurG5exuq7ds3l7X/ndJ8IA8t39y2e/0iFsV0+3IxzZximjnFNCOKaUYU04wopuuJMsT0SJrdptux3Gc0pnPLd3cB3zs4Wjg2M9O8D8U0c4ppRhTTjCimGVFM1xNliOk70I7W3MxxJKajM8/NMdx9wdwdV4NjpjlWMc2cYpoRxTQjimlGFNP1RBlieoCLy6u546W7cbp3cDR3UrC9g6Pb45pzMT20/OnZ+dyZxIfiuxvTzWM5mzfHKKaZU0wzophmRDHNiGK6nihDTGfY3tmf2wW7S3dX7O4JyIZiemj5ZiZ60fHaXfydad6HYpo5xTQjimlGFNOMKKbriTLE9MQQ08wppplTTDOimGZEMc2IYrqeKENMTwwxzZximjnFNCOKaUYU04wopuuJMsT0xBDTzCmmmVNMM6KYZkQxzYhiup4oQ0xPDDHNnGKaOcU0I4ppRhTTjCim64kyxPTEENPMKaaZU0wzophmRDHNiGK6nihDTE+M5x9fpw+f3aysx797k8T0chXTzCmmGVFMM6KYZkQxXU+UIaYnxsufv0+fXr1bWf/gp8vfqExdMc2cYpoRxTQjimlGFNP1RBlieoIse6XlaiummVNMM6KYZkQxzYhiup4oQ0xPkGWvtFxtxTRzimlGFNOMKKYZUUzXE2WI6Qmy7JWWq62YZk4xzYhimhHFNCOK6XqiDDE9QZa90nK1FdPMKaYZUUwzophmRDFdT5QhpifGyy9v0h/+0tmyuVgxzZximhHFNCOKaUYU0/VEGWJ6Yvyb37pOP/lDGyMuVkwzp5hmRDHNiGKaEcV0PVGGmJ4Yv/nvxTSHFdPMKaYZUUwzophmRDFdT5QhpieGmGZOMc2cYpoRxTQjimlGFNP1RBliemKIaeYU08wpphlRTDOimGZEMV1PlCGmJ4aYZk4xzZximhHFNCOKaUYU0/VEGWJ6CZyenae19a2lPLaYZk4xzZximhHFNCOKaUYU0/VEGU8upvcOjtLa+taMF5dXxffbvc/tnf0739dDxPTG5m7a2Nydu1xMM6eYZk4xzYhimhHFNCOK6XqijCcZ03sHR7c/X1xeFQf1xuZuOjw+mbmsJKZrc3F5lbZ39tPG5u7c8xbTzCmmmVNMM6KYZkQxzYhiup4o48nHdEpfhW8Tw2vrW7czw2vrW+nV6zcppZQOj09mZp6by1+9fjPz8yIWLd/3mN/75PtzM9NDy7eXXVvf6p1x7r4Gp2fnva+FmGZOMc2cYpoRxTQjimlGFNP1RBmTjOlujB4en8xcdnp2PvPzxubu4Ex0bvnuYzaz5ZHlu7dtns9Q3DcxfnF5NfdcxTRzimnmFNOMKKYZUUwzopiuJ8p48jHdzOo28dm3y3ff7tDd23WPmT49Ow8v372vbiAPLT92N/VmF+9Fz0NMM6eYZk4xzYhimhHFNCOK6XqijCcZ093w7e5y3ReufS4K2CbQ27E8tHwupnPLd3cB7868d59/O/S7v1wQ08wppplTTDOimGZEMc2IYrqeKONJxvRQbC6K6bEnKNvY3L2N1tzykZiOPn5zDHc7mLuP1WeDmGZOMc2cYpoRxTQjimlGFNP1RBli+m+X6R5bvHdwdHvccfd46e6fthpavu8xuzE9tPzp2fncmcQXxXffMdLd24tp5hTTzCmmGVFMM6KYZkQxXU+UIab/lu6u1O0o3d7ZXzjTG1k+F9NDyzcz0YuO126zvbPf+9zbr4mYZk4xzZximhHFNCOKaUYU0/VEGU8upjGMmGZOMc2cYpoRxTQjimlGFNP1RBliemKIaeYU08wpphlRTDOimGZEMV1PlCGmJ4aYZk4xzZximhHFNCOKaUYU0/VEGWJ6Yohp5hTTzCmmGVFMM6KYZkQxXU+UIaYnhphmTjHNnGKaEcU0I4ppRhTT9UQZYnpi/NvfEtMcVkwzp5hmRDHNiGKaEcV0PVGGmJ4YL//3+/SHf7T8FZerq5hmTjHNiGKaEcU0I4rpeqIMMT1Blr3ScrUV08wpphlRTDOimGZEMV1PlCGmJ8iyV1qutmKaOcU0I4ppRhTTjCim64kyxPQEWfZKy9VWTDOnmGZEMc2IYpoRxXQ9UYaYniDLXmm52opp5hTTjCimGVFMM6KYrifKENMT4+WXN+mzL22MuFgxzZximhHFNCOKaUYU0/VEGWJ6Yvzzf3Wd/tsPbIy4WDHNnGKaEcU0I4ppRhTT9UQZYnpi/JN/cZ3+64WNERcrpplTTDOimGZEMc2IYrqeKENMTwwxzZximjnFNCOKaUYU04wopuuJMsT0xBDTzCmmmVNMM6KYZkQxzYhiup4oQ0xPDDHNnGKaOcU0I4ppRhTTjCim64kyxPQT5/TsPG1s7t7+LKaZU0wzp5hmRDHNiGKaEcV0PVGGmF4Ca+tbM27v7Fd7LDHNsYpp5hTTjCimGVFMM6KYrifKENMPzMbmbjo8Ppm5TExzlRTTzCmmGVFMM6KYZkQxXU+UIaYfkFev36S19a306vWbwdsdHp/MzFx3b5+7fntnf+Z6Mc0ximnmFNOMKKYZUUwzopiuJ8oQ0w/Mxubu4Ez04fHJTPx2Z5Zz1+8dHM3cv5lpjlVMM6eYZkQxzYhimhHFdD1RhpheAt1jpk/Pzm+v29jcTReXV3O3by7LXd/+d0pimuMV08wpphlRTDOimGZEMV1PlCGml8zp2flcDPcZub5vN3IxzbGKaeYU04wophlRTDOimK4nyhDTK8DG5u7t7HR3ZrnL2OvFNMcqpplTTDOimGZEMc2IYrqeKENMPyAXl1dzx0s3M9MNewdHM/HbXNbMNueu397ZT3sHR7fXbe/si2mOUkwzp5hmRDHNiGKaEcV0PVGGmH5gumfabod0Q/ds3d14zl3fvq4b32KaOcU0c4ppRhTTjCimGVFM1xNliOmJIaaZU0wzp5hmRDHNiGKaEcV0PVGGmJ4YYpo5xTRzimlGFNOMKKYZUUzXE2WI6YkhpplTTDOnmGZEMc2IYpoRxXQ9UYaYnhhimjnFNHOKaUYU04wophlRTNcTZYjpiSGmmVNMM6eYZkQxzYhimhHFdD1RhpieGP/iX1+n//YDGyMuVkwzp5hmRDHNiGKaEcV0PVGGmJ4YL3/+Pn3+c6HExYpp5hTTjCimGVFMM6KYrifKENMTZNkrLVdbMc2cYpoRxTQjimlGFNP1RBlieoIse6XlaiummVNMM6KYZkQxzYhiup4oQ0wDAAAAADASMQ0AAAAAwEjENAAAAAAAIxHTAAAAAACMRExPhO2d/bS2vpXW1rfS9s7+soeDFeDw+OT2M9G2YWNzd+F1eNqcnp2njc3d3uuGtiU+M9Ni0efEtgUppbR3cDTzHvd997A9Qe5zYnuCVUdMT4C9g6OZjdP2zn7aOzha4oiwChwenwz+YmVjczednp3f/tz9HOHpcXF5dftlpC+SctsSn5lpkPuc2LYgpTT32djY3E2Hxye3P9ueIKX858T2BKuOmJ4AG5u76eLy6vbni8urhbNOmA5j/4Mamq3E02LRe53blvjMTIuhmWnbFnTpfi5sT9BH93Nie4JVR0w/cV69fpPW1rfSq9dvBi/D9OjuOtX32+H2f1DbO/szvy3G06Xvy0hkW+IzMy2iu3nbtiCl2ffZ9gSL6L7PtidYdcT0E0dMI8r2zv7crEH3OCT/QU2Dkpj2mZkO0Rkg2xY0QdRge4I+up+TPmxPsGqI6SeOmEaU5jjIhu5ve1NKaW19a+4yPD3ua2Y6JZ+Zp0w0pm1bps3p2Xnoe4jtybTp+5z0YXuCVUNMTwDHTCNC5D8ou09Ng/s6Zjoln5mnzH3GtM/J02RoptH2BA2RGekG2xOsGmJ6AjibN/roO+5o6EyqzayB3/Y+fRZF0tiz7/rMPG2GfunS/dnnZHp0d8ftYnuClPKfE9sTrDpieiL4O9Po0v5MrK1vzf2Cpe84JP85PW3af/Jo0bFnY/8urM/M0yP3ObFtQRM0fbZno21Ppk3kc2J7glVHTAMAAAAAMBIxDQAAAADASMQ0AAAAAAAjEdMAAAAAAIxETAMAAAAAMBIxDQAAAADASMQ0AAAAAAAjEdMAAAAAAIxETAMAAAAAMBIxDQAAAADASMQ0AAAAAAAjEdMAAAAAAIxETAMAAAAAMBIxDQAAAADASMQ0AAAAAAAjEdMAAAAAAIxETAMAAAAAMBIxDQAAAADASMQ0AAAAAAAjEdMAAAAAAIxETAMAAAAAMBIxDQAAAADASMQ0AAAAAAAjEdMAAAAAAIxETAMAAAAAMBIxDQAAAADASMQ0AAAAAAAjEdMAAAAAAIxETAMAAAAAMBIxDQAAAADASMQ0AAAAAAAjEdMAAAAAAIxETAMAAAAAMBIxDQAAAADASMQ0AAAAAAAjEdMAAAAAAIxETAMAAAAAMBIxDQAAAADASMQ0AAAAAAAjEdMAAAAAAIxETAMAAAAAMBIxDQAAAADASMQ0AAAAAAAjEdMAAAAAAIxETAMAAAAAMBIxDQAAAADASMQ0AAAAAAAjEdMAAAAAAIxETAMAAAAAMBIxDQAAAADASMQ0AAAAAAAjEdMAAAAAAIxETAMAAAAAMBIxDQAAAADASMQ0AAAAAAAjEdMAAAAAAIxETAMAAAAAMBIxDQAAAADASMQ0AAAAAAAjEdMAAAAAAIxETAMAAAAAMBIxDQAAAADASMQ0AAAAAAAjEdMAAAAAAIxETAMAAAAAMBIxDQAAAADASMQ0AAAAAAAj+f8zBnreEa4rgQAAAABJRU5ErkJggg==", + "text/html": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "fig = go.Figure(data = go.Bar(y = top10_inv_type_amt.index[::-1],\n", + " x= top10_inv_type_amt['sum'][::-1],\n", + " orientation='h'),\n", + " layout_title_text = 'Top 10 Investment types against their amount')\n", + "fig.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Insight:\n", + "\n", + "\n", + "* Private equity funding seems to be the one with high number of deals and the highest sum value of 26.7B raised as well\n", + "* Though seed funding has 1388 funding deals, the sum of money raised is just about 500M since they happen during the very early stages of a startup.\n", + "\n", + "Now let us see what is the average value raised by the startups in each of these funding rounds." + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.plotly.v1+json": { + "config": { + "plotlyServerURL": "https://plot.ly" + }, + "data": [ + { + "orientation": "h", + "type": "bar", + "x": [ + 13200000, + 5910285.714285715, + 123483250, + 74622742.42857143, + 47459786.75, + 9236363.636363637, + 6288348.083333333, + 2051781.9090909092, + 24932712.840485074, + 1038448.3373493976 + ], + "y": [ + "Seed", + "PreSeries A", + "Series D", + "Series C", + "Series B", + "Series A", + "Debt Funding", + "Seed Angel Funding", + "Private Equity", + "Seed Funding" + ] + } + ], + "layout": { + "autosize": true, + "template": { + "data": { + "bar": [ + { + "error_x": { + "color": "#2a3f5f" + }, + "error_y": { + "color": "#2a3f5f" + }, + "marker": { + "line": { + "color": "#E5ECF6", + "width": 0.5 + } + }, + "type": "bar" + } + ], + "barpolar": [ + { + "marker": { + "line": { + "color": "#E5ECF6", + "width": 0.5 + } + }, + "type": "barpolar" + } + ], + "carpet": [ + { + "aaxis": { + "endlinecolor": "#2a3f5f", + "gridcolor": "white", + "linecolor": "white", + "minorgridcolor": "white", + "startlinecolor": "#2a3f5f" + }, + "baxis": { + "endlinecolor": "#2a3f5f", + "gridcolor": "white", + "linecolor": "white", + "minorgridcolor": "white", + "startlinecolor": "#2a3f5f" + }, + "type": "carpet" + } + ], + "choropleth": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "choropleth" + } + ], + "contour": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "contour" + } + ], + "contourcarpet": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "contourcarpet" + } + ], + "heatmap": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "heatmap" + } + ], + "heatmapgl": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "heatmapgl" + } + ], + "histogram": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "histogram" + } + ], + "histogram2d": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "histogram2d" + } + ], + "histogram2dcontour": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "histogram2dcontour" + } + ], + "mesh3d": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "mesh3d" + } + ], + "parcoords": [ + { + "line": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "parcoords" + } + ], + "pie": [ + { + "automargin": true, + "type": "pie" + } + ], + "scatter": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatter" + } + ], + "scatter3d": [ + { + "line": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatter3d" + } + ], + "scattercarpet": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattercarpet" + } + ], + "scattergeo": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattergeo" + } + ], + "scattergl": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattergl" + } + ], + "scattermapbox": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattermapbox" + } + ], + "scatterpolar": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterpolar" + } + ], + "scatterpolargl": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterpolargl" + } + ], + "scatterternary": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterternary" + } + ], + "surface": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "surface" + } + ], + "table": [ + { + "cells": { + "fill": { + "color": "#EBF0F8" + }, + "line": { + "color": "white" + } + }, + "header": { + "fill": { + "color": "#C8D4E3" + }, + "line": { + "color": "white" + } + }, + "type": "table" + } + ] + }, + "layout": { + "annotationdefaults": { + "arrowcolor": "#2a3f5f", + "arrowhead": 0, + "arrowwidth": 1 + }, + "coloraxis": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "colorscale": { + "diverging": [ + [ + 0, + "#8e0152" + ], + [ + 0.1, + "#c51b7d" + ], + [ + 0.2, + "#de77ae" + ], + [ + 0.3, + "#f1b6da" + ], + [ + 0.4, + "#fde0ef" + ], + [ + 0.5, + "#f7f7f7" + ], + [ + 0.6, + "#e6f5d0" + ], + [ + 0.7, + "#b8e186" + ], + [ + 0.8, + "#7fbc41" + ], + [ + 0.9, + "#4d9221" + ], + [ + 1, + "#276419" + ] + ], + "sequential": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "sequentialminus": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ] + }, + "colorway": [ + "#636efa", + "#EF553B", + "#00cc96", + "#ab63fa", + "#FFA15A", + "#19d3f3", + "#FF6692", + "#B6E880", + "#FF97FF", + "#FECB52" + ], + "font": { + "color": "#2a3f5f" + }, + "geo": { + "bgcolor": "white", + "lakecolor": "white", + "landcolor": "#E5ECF6", + "showlakes": true, + "showland": true, + "subunitcolor": "white" + }, + "hoverlabel": { + "align": "left" + }, + "hovermode": "closest", + "mapbox": { + "style": "light" + }, + "paper_bgcolor": "white", + "plot_bgcolor": "#E5ECF6", + "polar": { + "angularaxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + }, + "bgcolor": "#E5ECF6", + "radialaxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + } + }, + "scene": { + "xaxis": { + "backgroundcolor": "#E5ECF6", + "gridcolor": "white", + "gridwidth": 2, + "linecolor": "white", + "showbackground": true, + "ticks": "", + "zerolinecolor": "white" + }, + "yaxis": { + "backgroundcolor": "#E5ECF6", + "gridcolor": "white", + "gridwidth": 2, + "linecolor": "white", + "showbackground": true, + "ticks": "", + "zerolinecolor": "white" + }, + "zaxis": { + "backgroundcolor": "#E5ECF6", + "gridcolor": "white", + "gridwidth": 2, + "linecolor": "white", + "showbackground": true, + "ticks": "", + "zerolinecolor": "white" + } + }, + "shapedefaults": { + "line": { + "color": "#2a3f5f" + } + }, + "ternary": { + "aaxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + }, + "baxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + }, + "bgcolor": "#E5ECF6", + "caxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + } + }, + "title": { + "x": 0.05 + }, + "xaxis": { + "automargin": true, + "gridcolor": "white", + "linecolor": "white", + "ticks": "", + "title": { + "standoff": 15 + }, + "zerolinecolor": "white", + "zerolinewidth": 2 + }, + "yaxis": { + "automargin": true, + "gridcolor": "white", + "linecolor": "white", + "ticks": "", + "title": { + "standoff": 15 + }, + "zerolinecolor": "white", + "zerolinewidth": 2 + } + } + }, + "title": { + "text": "Top 10 Investment types against their avg. amount" + }, + "xaxis": { + "autorange": true, + "range": [ + 0, + 129982368.42105263 + ], + "type": "linear" + }, + "yaxis": { + "autorange": true, + "range": [ + -0.5, + 9.5 + ], + "type": "category" + } + } + }, + "image/png": "", + "text/html": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "fig = go.Figure(data = go.Bar(y = top10_inv_type_amt.index[::-1],\n", + " x= top10_inv_type_amt['mean'][::-1],\n", + " orientation='h'),\n", + " layout_title_text = 'Top 10 Investment types against their avg. amount')\n", + "fig.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Insight:\n", + "We can see a clear increase in the mean funding value as we go up the funding round ladder from Seed funding to Series D as expected." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Task4: Location" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Find about the major start up hubs in India.\n", + "\n", + "Now let us explore the location of the startups that got funded. This can help us to understand the startup hubs of India.\n", + "\n", + "Since there are multiple locations in the data, let us plot the top 10 locations. We will also club New Delhi, Gurgaon & Noida together to form NCR for the below chart." + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "102" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.City.nunique()" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "NCR 892\n", + "Bangalore 842\n", + "Mumbai 568\n", + "Pune 105\n", + "Hyderabad 99\n", + "Chennai 97\n", + "Ahmedabad 38\n", + "Jaipur 30\n", + "Kolkata 21\n", + "Indore 13\n", + "Name: City, dtype: int64" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "top10_cities = df.City.value_counts()[:10]\n", + "top10_cities" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.plotly.v1+json": { + "config": { + "plotlyServerURL": "https://plot.ly" + }, + "data": [ + { + "orientation": "h", + "type": "bar", + "x": [ + 13, + 21, + 30, + 38, + 97, + 99, + 105, + 568, + 842, + 892 + ], + "y": [ + "Indore", + "Kolkata", + "Jaipur", + "Ahmedabad", + "Chennai", + "Hyderabad", + "Pune", + "Mumbai", + "Bangalore", + "NCR" + ] + } + ], + "layout": { + "autosize": true, + "template": { + "data": { + "bar": [ + { + "error_x": { + "color": "#2a3f5f" + }, + "error_y": { + "color": "#2a3f5f" + }, + "marker": { + "line": { + "color": "#E5ECF6", + "width": 0.5 + } + }, + "type": "bar" + } + ], + "barpolar": [ + { + "marker": { + "line": { + "color": "#E5ECF6", + "width": 0.5 + } + }, + "type": "barpolar" + } + ], + "carpet": [ + { + "aaxis": { + "endlinecolor": "#2a3f5f", + "gridcolor": "white", + "linecolor": "white", + "minorgridcolor": "white", + "startlinecolor": "#2a3f5f" + }, + "baxis": { + "endlinecolor": "#2a3f5f", + "gridcolor": "white", + "linecolor": "white", + "minorgridcolor": "white", + "startlinecolor": "#2a3f5f" + }, + "type": "carpet" + } + ], + "choropleth": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "choropleth" + } + ], + "contour": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "contour" + } + ], + "contourcarpet": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "contourcarpet" + } + ], + "heatmap": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "heatmap" + } + ], + "heatmapgl": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "heatmapgl" + } + ], + "histogram": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "histogram" + } + ], + "histogram2d": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "histogram2d" + } + ], + "histogram2dcontour": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "histogram2dcontour" + } + ], + "mesh3d": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "mesh3d" + } + ], + "parcoords": [ + { + "line": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "parcoords" + } + ], + "pie": [ + { + "automargin": true, + "type": "pie" + } + ], + "scatter": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatter" + } + ], + "scatter3d": [ + { + "line": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatter3d" + } + ], + "scattercarpet": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattercarpet" + } + ], + "scattergeo": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattergeo" + } + ], + "scattergl": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattergl" + } + ], + "scattermapbox": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattermapbox" + } + ], + "scatterpolar": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterpolar" + } + ], + "scatterpolargl": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterpolargl" + } + ], + "scatterternary": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterternary" + } + ], + "surface": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "surface" + } + ], + "table": [ + { + "cells": { + "fill": { + "color": "#EBF0F8" + }, + "line": { + "color": "white" + } + }, + "header": { + "fill": { + "color": "#C8D4E3" + }, + "line": { + "color": "white" + } + }, + "type": "table" + } + ] + }, + "layout": { + "annotationdefaults": { + "arrowcolor": "#2a3f5f", + "arrowhead": 0, + "arrowwidth": 1 + }, + "coloraxis": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "colorscale": { + "diverging": [ + [ + 0, + "#8e0152" + ], + [ + 0.1, + "#c51b7d" + ], + [ + 0.2, + "#de77ae" + ], + [ + 0.3, + "#f1b6da" + ], + [ + 0.4, + "#fde0ef" + ], + [ + 0.5, + "#f7f7f7" + ], + [ + 0.6, + "#e6f5d0" + ], + [ + 0.7, + "#b8e186" + ], + [ + 0.8, + "#7fbc41" + ], + [ + 0.9, + "#4d9221" + ], + [ + 1, + "#276419" + ] + ], + "sequential": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "sequentialminus": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ] + }, + "colorway": [ + "#636efa", + "#EF553B", + "#00cc96", + "#ab63fa", + "#FFA15A", + "#19d3f3", + "#FF6692", + "#B6E880", + "#FF97FF", + "#FECB52" + ], + "font": { + "color": "#2a3f5f" + }, + "geo": { + "bgcolor": "white", + "lakecolor": "white", + "landcolor": "#E5ECF6", + "showlakes": true, + "showland": true, + "subunitcolor": "white" + }, + "hoverlabel": { + "align": "left" + }, + "hovermode": "closest", + "mapbox": { + "style": "light" + }, + "paper_bgcolor": "white", + "plot_bgcolor": "#E5ECF6", + "polar": { + "angularaxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + }, + "bgcolor": "#E5ECF6", + "radialaxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + } + }, + "scene": { + "xaxis": { + "backgroundcolor": "#E5ECF6", + "gridcolor": "white", + "gridwidth": 2, + "linecolor": "white", + "showbackground": true, + "ticks": "", + "zerolinecolor": "white" + }, + "yaxis": { + "backgroundcolor": "#E5ECF6", + "gridcolor": "white", + "gridwidth": 2, + "linecolor": "white", + "showbackground": true, + "ticks": "", + "zerolinecolor": "white" + }, + "zaxis": { + "backgroundcolor": "#E5ECF6", + "gridcolor": "white", + "gridwidth": 2, + "linecolor": "white", + "showbackground": true, + "ticks": "", + "zerolinecolor": "white" + } + }, + "shapedefaults": { + "line": { + "color": "#2a3f5f" + } + }, + "ternary": { + "aaxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + }, + "baxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + }, + "bgcolor": "#E5ECF6", + "caxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + } + }, + "title": { + "x": 0.05 + }, + "xaxis": { + "automargin": true, + "gridcolor": "white", + "linecolor": "white", + "ticks": "", + "title": { + "standoff": 15 + }, + "zerolinecolor": "white", + "zerolinewidth": 2 + }, + "yaxis": { + "automargin": true, + "gridcolor": "white", + "linecolor": "white", + "ticks": "", + "title": { + "standoff": 15 + }, + "zerolinecolor": "white", + "zerolinewidth": 2 + } + } + }, + "title": { + "text": "Top 10 cities wrt to funding deals" + }, + "xaxis": { + "autorange": true, + "range": [ + 0, + 938.9473684210526 + ], + "type": "linear" + }, + "yaxis": { + "autorange": true, + "range": [ + -0.5, + 9.5 + ], + "type": "category" + } + } + }, + "image/png": "", + "text/html": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "fig = go.Figure(data = go.Bar(y= top10_cities.index[::-1],\n", + " x= top10_cities.values[::-1],\n", + " orientation = 'h'),\n", + " layout_title_text='Top 10 cities wrt to funding deals')\n", + "fig.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Insight:\n", + "\n", + "\n", + "* NCR & Bangalore are almost equal to each other with respect to number of funding deals followed by Mumbai in third place.\n", + " \n", + "* Chennai, Hyderabad & Pune are the next set of cities are that are catching up.\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sizesummean
City
NCR8928.461949e+091.492407e+07
Bangalore8421.462637e+102.508811e+07
Mumbai5684.940535e+091.228989e+07
Pune1056.330820e+088.916648e+06
Hyderabad994.010762e+085.570503e+06
Chennai977.187670e+089.583560e+06
Ahmedabad381.136360e+084.208741e+06
Jaipur301.527350e+081.090964e+07
Kolkata211.598300e+071.598300e+06
Indore134.672000e+069.344000e+05
\n", + "
" + ], + "text/plain": [ + " size sum mean\n", + "City \n", + "NCR 892 8.461949e+09 1.492407e+07\n", + "Bangalore 842 1.462637e+10 2.508811e+07\n", + "Mumbai 568 4.940535e+09 1.228989e+07\n", + "Pune 105 6.330820e+08 8.916648e+06\n", + "Hyderabad 99 4.010762e+08 5.570503e+06\n", + "Chennai 97 7.187670e+08 9.583560e+06\n", + "Ahmedabad 38 1.136360e+08 4.208741e+06\n", + "Jaipur 30 1.527350e+08 1.090964e+07\n", + "Kolkata 21 1.598300e+07 1.598300e+06\n", + "Indore 13 4.672000e+06 9.344000e+05" + ] + }, + "execution_count": 25, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "top10_cities_amt = df.groupby('City').CleanedAmount.agg(['size','sum','mean']).sort_values('size',ascending=False)[:10]\n", + "top10_cities_amt" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.plotly.v1+json": { + "config": { + "plotlyServerURL": "https://plot.ly" + }, + "data": [ + { + "orientation": "h", + "type": "bar", + "x": [ + 4672000, + 15983000, + 113636000, + 152735000, + 401076230, + 633082000, + 718767000, + 4940535015, + 8461949172.54, + 14626366863 + ], + "y": [ + "Indore", + "Kolkata", + "Ahmedabad", + "Jaipur", + "Hyderabad", + "Pune", + "Chennai", + "Mumbai", + "NCR", + "Bangalore" + ] + } + ], + "layout": { + "autosize": true, + "template": { + "data": { + "bar": [ + { + "error_x": { + "color": "#2a3f5f" + }, + "error_y": { + "color": "#2a3f5f" + }, + "marker": { + "line": { + "color": "#E5ECF6", + "width": 0.5 + } + }, + "type": "bar" + } + ], + "barpolar": [ + { + "marker": { + "line": { + "color": "#E5ECF6", + "width": 0.5 + } + }, + "type": "barpolar" + } + ], + "carpet": [ + { + "aaxis": { + "endlinecolor": "#2a3f5f", + "gridcolor": "white", + "linecolor": "white", + "minorgridcolor": "white", + "startlinecolor": "#2a3f5f" + }, + "baxis": { + "endlinecolor": "#2a3f5f", + "gridcolor": "white", + "linecolor": "white", + "minorgridcolor": "white", + "startlinecolor": "#2a3f5f" + }, + "type": "carpet" + } + ], + "choropleth": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "choropleth" + } + ], + "contour": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "contour" + } + ], + "contourcarpet": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "contourcarpet" + } + ], + "heatmap": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "heatmap" + } + ], + "heatmapgl": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "heatmapgl" + } + ], + "histogram": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "histogram" + } + ], + "histogram2d": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "histogram2d" + } + ], + "histogram2dcontour": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "histogram2dcontour" + } + ], + "mesh3d": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "mesh3d" + } + ], + "parcoords": [ + { + "line": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "parcoords" + } + ], + "pie": [ + { + "automargin": true, + "type": "pie" + } + ], + "scatter": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatter" + } + ], + "scatter3d": [ + { + "line": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatter3d" + } + ], + "scattercarpet": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattercarpet" + } + ], + "scattergeo": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattergeo" + } + ], + "scattergl": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattergl" + } + ], + "scattermapbox": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattermapbox" + } + ], + "scatterpolar": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterpolar" + } + ], + "scatterpolargl": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterpolargl" + } + ], + "scatterternary": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterternary" + } + ], + "surface": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "surface" + } + ], + "table": [ + { + "cells": { + "fill": { + "color": "#EBF0F8" + }, + "line": { + "color": "white" + } + }, + "header": { + "fill": { + "color": "#C8D4E3" + }, + "line": { + "color": "white" + } + }, + "type": "table" + } + ] + }, + "layout": { + "annotationdefaults": { + "arrowcolor": "#2a3f5f", + "arrowhead": 0, + "arrowwidth": 1 + }, + "coloraxis": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "colorscale": { + "diverging": [ + [ + 0, + "#8e0152" + ], + [ + 0.1, + "#c51b7d" + ], + [ + 0.2, + "#de77ae" + ], + [ + 0.3, + "#f1b6da" + ], + [ + 0.4, + "#fde0ef" + ], + [ + 0.5, + "#f7f7f7" + ], + [ + 0.6, + "#e6f5d0" + ], + [ + 0.7, + "#b8e186" + ], + [ + 0.8, + "#7fbc41" + ], + [ + 0.9, + "#4d9221" + ], + [ + 1, + "#276419" + ] + ], + "sequential": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "sequentialminus": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ] + }, + "colorway": [ + "#636efa", + "#EF553B", + "#00cc96", + "#ab63fa", + "#FFA15A", + "#19d3f3", + "#FF6692", + "#B6E880", + "#FF97FF", + "#FECB52" + ], + "font": { + "color": "#2a3f5f" + }, + "geo": { + "bgcolor": "white", + "lakecolor": "white", + "landcolor": "#E5ECF6", + "showlakes": true, + "showland": true, + "subunitcolor": "white" + }, + "hoverlabel": { + "align": "left" + }, + "hovermode": "closest", + "mapbox": { + "style": "light" + }, + "paper_bgcolor": "white", + "plot_bgcolor": "#E5ECF6", + "polar": { + "angularaxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + }, + "bgcolor": "#E5ECF6", + "radialaxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + } + }, + "scene": { + "xaxis": { + "backgroundcolor": "#E5ECF6", + "gridcolor": "white", + "gridwidth": 2, + "linecolor": "white", + "showbackground": true, + "ticks": "", + "zerolinecolor": "white" + }, + "yaxis": { + "backgroundcolor": "#E5ECF6", + "gridcolor": "white", + "gridwidth": 2, + "linecolor": "white", + "showbackground": true, + "ticks": "", + "zerolinecolor": "white" + }, + "zaxis": { + "backgroundcolor": "#E5ECF6", + "gridcolor": "white", + "gridwidth": 2, + "linecolor": "white", + "showbackground": true, + "ticks": "", + "zerolinecolor": "white" + } + }, + "shapedefaults": { + "line": { + "color": "#2a3f5f" + } + }, + "ternary": { + "aaxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + }, + "baxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + }, + "bgcolor": "#E5ECF6", + "caxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + } + }, + "title": { + "x": 0.05 + }, + "xaxis": { + "automargin": true, + "gridcolor": "white", + "linecolor": "white", + "ticks": "", + "title": { + "standoff": 15 + }, + "zerolinecolor": "white", + "zerolinewidth": 2 + }, + "yaxis": { + "automargin": true, + "gridcolor": "white", + "linecolor": "white", + "ticks": "", + "title": { + "standoff": 15 + }, + "zerolinecolor": "white", + "zerolinewidth": 2 + } + } + }, + "title": { + "text": "Top 10 cities wrt to funding deals against the total amount invested" + }, + "xaxis": { + "autorange": true, + "range": [ + 0, + 15396175645.263159 + ], + "type": "linear" + }, + "yaxis": { + "autorange": true, + "range": [ + -0.5, + 9.5 + ], + "type": "category" + } + } + }, + "image/png": "", + "text/html": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "fig = go.Figure(data = go.Bar(y= top10_cities_amt.sort_values('sum',ascending=True).index,\n", + " x= top10_cities_amt.sort_values('sum',ascending=True)['sum'],\n", + " orientation = 'h'),\n", + " layout_title_text='Top 10 cities wrt to funding deals against the total amount invested')\n", + "fig.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Insight:\n", + "* Though NCR tops the number of funding deals when it comes to the total funding value by location, Bangalore leads the way by a huge margin.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.plotly.v1+json": { + "config": { + "plotlyServerURL": "https://plot.ly" + }, + "data": [ + { + "orientation": "h", + "type": "bar", + "x": [ + 934400, + 1598300, + 4208740.740740741, + 5570503.194444444, + 8916647.887323944, + 9583560, + 10909642.857142856, + 12289888.097014925, + 14924072.614708995, + 25088107.826758146 + ], + "y": [ + "Indore", + "Kolkata", + "Ahmedabad", + "Hyderabad", + "Pune", + "Chennai", + "Jaipur", + "Mumbai", + "NCR", + "Bangalore" + ] + } + ], + "layout": { + "autosize": true, + "template": { + "data": { + "bar": [ + { + "error_x": { + "color": "#2a3f5f" + }, + "error_y": { + "color": "#2a3f5f" + }, + "marker": { + "line": { + "color": "#E5ECF6", + "width": 0.5 + } + }, + "type": "bar" + } + ], + "barpolar": [ + { + "marker": { + "line": { + "color": "#E5ECF6", + "width": 0.5 + } + }, + "type": "barpolar" + } + ], + "carpet": [ + { + "aaxis": { + "endlinecolor": "#2a3f5f", + "gridcolor": "white", + "linecolor": "white", + "minorgridcolor": "white", + "startlinecolor": "#2a3f5f" + }, + "baxis": { + "endlinecolor": "#2a3f5f", + "gridcolor": "white", + "linecolor": "white", + "minorgridcolor": "white", + "startlinecolor": "#2a3f5f" + }, + "type": "carpet" + } + ], + "choropleth": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "choropleth" + } + ], + "contour": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "contour" + } + ], + "contourcarpet": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "contourcarpet" + } + ], + "heatmap": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "heatmap" + } + ], + "heatmapgl": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "heatmapgl" + } + ], + "histogram": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "histogram" + } + ], + "histogram2d": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "histogram2d" + } + ], + "histogram2dcontour": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "histogram2dcontour" + } + ], + "mesh3d": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "mesh3d" + } + ], + "parcoords": [ + { + "line": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "parcoords" + } + ], + "pie": [ + { + "automargin": true, + "type": "pie" + } + ], + "scatter": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatter" + } + ], + "scatter3d": [ + { + "line": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatter3d" + } + ], + "scattercarpet": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattercarpet" + } + ], + "scattergeo": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattergeo" + } + ], + "scattergl": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattergl" + } + ], + "scattermapbox": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattermapbox" + } + ], + "scatterpolar": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterpolar" + } + ], + "scatterpolargl": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterpolargl" + } + ], + "scatterternary": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterternary" + } + ], + "surface": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "surface" + } + ], + "table": [ + { + "cells": { + "fill": { + "color": "#EBF0F8" + }, + "line": { + "color": "white" + } + }, + "header": { + "fill": { + "color": "#C8D4E3" + }, + "line": { + "color": "white" + } + }, + "type": "table" + } + ] + }, + "layout": { + "annotationdefaults": { + "arrowcolor": "#2a3f5f", + "arrowhead": 0, + "arrowwidth": 1 + }, + "coloraxis": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "colorscale": { + "diverging": [ + [ + 0, + "#8e0152" + ], + [ + 0.1, + "#c51b7d" + ], + [ + 0.2, + "#de77ae" + ], + [ + 0.3, + "#f1b6da" + ], + [ + 0.4, + "#fde0ef" + ], + [ + 0.5, + "#f7f7f7" + ], + [ + 0.6, + "#e6f5d0" + ], + [ + 0.7, + "#b8e186" + ], + [ + 0.8, + "#7fbc41" + ], + [ + 0.9, + "#4d9221" + ], + [ + 1, + "#276419" + ] + ], + "sequential": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "sequentialminus": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ] + }, + "colorway": [ + "#636efa", + "#EF553B", + "#00cc96", + "#ab63fa", + "#FFA15A", + "#19d3f3", + "#FF6692", + "#B6E880", + "#FF97FF", + "#FECB52" + ], + "font": { + "color": "#2a3f5f" + }, + "geo": { + "bgcolor": "white", + "lakecolor": "white", + "landcolor": "#E5ECF6", + "showlakes": true, + "showland": true, + "subunitcolor": "white" + }, + "hoverlabel": { + "align": "left" + }, + "hovermode": "closest", + "mapbox": { + "style": "light" + }, + "paper_bgcolor": "white", + "plot_bgcolor": "#E5ECF6", + "polar": { + "angularaxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + }, + "bgcolor": "#E5ECF6", + "radialaxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + } + }, + "scene": { + "xaxis": { + "backgroundcolor": "#E5ECF6", + "gridcolor": "white", + "gridwidth": 2, + "linecolor": "white", + "showbackground": true, + "ticks": "", + "zerolinecolor": "white" + }, + "yaxis": { + "backgroundcolor": "#E5ECF6", + "gridcolor": "white", + "gridwidth": 2, + "linecolor": "white", + "showbackground": true, + "ticks": "", + "zerolinecolor": "white" + }, + "zaxis": { + "backgroundcolor": "#E5ECF6", + "gridcolor": "white", + "gridwidth": 2, + "linecolor": "white", + "showbackground": true, + "ticks": "", + "zerolinecolor": "white" + } + }, + "shapedefaults": { + "line": { + "color": "#2a3f5f" + } + }, + "ternary": { + "aaxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + }, + "baxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + }, + "bgcolor": "#E5ECF6", + "caxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + } + }, + "title": { + "x": 0.05 + }, + "xaxis": { + "automargin": true, + "gridcolor": "white", + "linecolor": "white", + "ticks": "", + "title": { + "standoff": 15 + }, + "zerolinecolor": "white", + "zerolinewidth": 2 + }, + "yaxis": { + "automargin": true, + "gridcolor": "white", + "linecolor": "white", + "ticks": "", + "title": { + "standoff": 15 + }, + "zerolinecolor": "white", + "zerolinewidth": 2 + } + } + }, + "title": { + "text": "Top 10 cities wrt to funding deals against the avg. amount invested" + }, + "xaxis": { + "autorange": true, + "range": [ + 0, + 26408534.55448226 + ], + "type": "linear" + }, + "yaxis": { + "autorange": true, + "range": [ + -0.5, + 9.5 + ], + "type": "category" + } + } + }, + "image/png": "", + "text/html": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "fig = go.Figure(data = go.Bar(y= top10_cities_amt.sort_values('mean',ascending=True).index,\n", + " x= top10_cities_amt.sort_values('mean',ascending=True)['mean'],\n", + " orientation = 'h'),\n", + " layout_title_text='Top 10 cities wrt to funding deals against the avg. amount invested')\n", + "fig.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Insight:\n", + "\n", + "* Bangalore tops the list here again\n", + "* Jaipur took the fourth spot with respect to mean funding value\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Cityyearsizemean
112NCR20163247.845871e+06
10Bangalore20162836.202401e+06
111NCR20152541.716481e+07
11Bangalore20172264.606586e+07
9Bangalore20152002.150281e+07
\n", + "
" + ], + "text/plain": [ + " City year size mean\n", + "112 NCR 2016 324 7.845871e+06\n", + "10 Bangalore 2016 283 6.202401e+06\n", + "111 NCR 2015 254 1.716481e+07\n", + "11 Bangalore 2017 226 4.606586e+07\n", + "9 Bangalore 2015 200 2.150281e+07" + ] + }, + "execution_count": 28, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "temp_df=(df.groupby(['City','year'])\n", + " .CleanedAmount.agg(['size','mean'])\n", + " .reset_index()\n", + " .sort_values('size',ascending=False)\n", + ")\n", + "temp_df = temp_df.loc[temp_df.City.isin([\"Bangalore\", \"NCR\", \"Mumbai\", \"Chennai\", \"Pune\", \"Hyderabad\", \"Jaipur\"])]\n", + "temp_df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.plotly.v1+json": { + "config": { + "plotlyServerURL": "https://plot.ly" + }, + "data": [ + { + "hovertemplate": "City=%{y}
year=%{x}
size=%{marker.size}", + "legendgroup": "NCR", + "marker": { + "color": "#636efa", + "size": [ + 324, + 254, + 197, + 81, + 33, + 3 + ], + "sizemode": "area", + "sizeref": 0.81, + "symbol": "circle" + }, + "mode": "markers", + "name": "NCR", + "orientation": "h", + "showlegend": true, + "type": "scatter", + "x": [ + 2016, + 2015, + 2017, + 2018, + 2019, + 2020 + ], + "xaxis": "x", + "y": [ + "NCR", + "NCR", + "NCR", + "NCR", + "NCR", + "NCR" + ], + "yaxis": "y" + }, + { + "hovertemplate": "City=%{y}
year=%{x}
size=%{marker.size}", + "legendgroup": "Bangalore", + "marker": { + "color": "#EF553B", + "size": [ + 283, + 226, + 200, + 100, + 31, + 2 + ], + "sizemode": "area", + "sizeref": 0.81, + "symbol": "circle" + }, + "mode": "markers", + "name": "Bangalore", + "orientation": "h", + "showlegend": true, + "type": "scatter", + "x": [ + 2016, + 2017, + 2015, + 2018, + 2019, + 2020 + ], + "xaxis": "x", + "y": [ + "Bangalore", + "Bangalore", + "Bangalore", + "Bangalore", + "Bangalore", + "Bangalore" + ], + "yaxis": "y" + }, + { + "hovertemplate": "City=%{y}
year=%{x}
size=%{marker.size}", + "legendgroup": "Mumbai", + "marker": { + "color": "#00cc96", + "size": [ + 187, + 166, + 141, + 61, + 12, + 1 + ], + "sizemode": "area", + "sizeref": 0.81, + "symbol": "circle" + }, + "mode": "markers", + "name": "Mumbai", + "orientation": "h", + "showlegend": true, + "type": "scatter", + "x": [ + 2016, + 2015, + 2017, + 2018, + 2019, + 2020 + ], + "xaxis": "x", + "y": [ + "Mumbai", + "Mumbai", + "Mumbai", + "Mumbai", + "Mumbai", + "Mumbai" + ], + "yaxis": "y" + }, + { + "hovertemplate": "City=%{y}
year=%{x}
size=%{marker.size}", + "legendgroup": "Pune", + "marker": { + "color": "#ab63fa", + "size": [ + 43, + 26, + 22, + 11, + 3 + ], + "sizemode": "area", + "sizeref": 0.81, + "symbol": "circle" + }, + "mode": "markers", + "name": "Pune", + "orientation": "h", + "showlegend": true, + "type": "scatter", + "x": [ + 2016, + 2017, + 2015, + 2018, + 2019 + ], + "xaxis": "x", + "y": [ + "Pune", + "Pune", + "Pune", + "Pune", + "Pune" + ], + "yaxis": "y" + }, + { + "hovertemplate": "City=%{y}
year=%{x}
size=%{marker.size}", + "legendgroup": "Hyderabad", + "marker": { + "color": "#FFA15A", + "size": [ + 33, + 29, + 24, + 10, + 3 + ], + "sizemode": "area", + "sizeref": 0.81, + "symbol": "circle" + }, + "mode": "markers", + "name": "Hyderabad", + "orientation": "h", + "showlegend": true, + "type": "scatter", + "x": [ + 2017, + 2016, + 2015, + 2018, + 2019 + ], + "xaxis": "x", + "y": [ + "Hyderabad", + "Hyderabad", + "Hyderabad", + "Hyderabad", + "Hyderabad" + ], + "yaxis": "y" + }, + { + "hovertemplate": "City=%{y}
year=%{x}
size=%{marker.size}", + "legendgroup": "Chennai", + "marker": { + "color": "#19d3f3", + "size": [ + 31, + 24, + 24, + 16, + 1, + 1 + ], + "sizemode": "area", + "sizeref": 0.81, + "symbol": "circle" + }, + "mode": "markers", + "name": "Chennai", + "orientation": "h", + "showlegend": true, + "type": "scatter", + "x": [ + 2016, + 2015, + 2017, + 2018, + 2020, + 2019 + ], + "xaxis": "x", + "y": [ + "Chennai", + "Chennai", + "Chennai", + "Chennai", + "Chennai", + "Chennai" + ], + "yaxis": "y" + }, + { + "hovertemplate": "City=%{y}
year=%{x}
size=%{marker.size}", + "legendgroup": "Jaipur", + "marker": { + "color": "#FF6692", + "size": [ + 15, + 6, + 5, + 3, + 1 + ], + "sizemode": "area", + "sizeref": 0.81, + "symbol": "circle" + }, + "mode": "markers", + "name": "Jaipur", + "orientation": "h", + "showlegend": true, + "type": "scatter", + "x": [ + 2016, + 2015, + 2017, + 2018, + 2019 + ], + "xaxis": "x", + "y": [ + "Jaipur", + "Jaipur", + "Jaipur", + "Jaipur", + "Jaipur" + ], + "yaxis": "y" + } + ], + "layout": { + "autosize": true, + "legend": { + "itemsizing": "constant", + "title": { + "text": "City" + }, + "tracegroupgap": 0 + }, + "showlegend": false, + "template": { + "data": { + "bar": [ + { + "error_x": { + "color": "#2a3f5f" + }, + "error_y": { + "color": "#2a3f5f" + }, + "marker": { + "line": { + "color": "#E5ECF6", + "width": 0.5 + } + }, + "type": "bar" + } + ], + "barpolar": [ + { + "marker": { + "line": { + "color": "#E5ECF6", + "width": 0.5 + } + }, + "type": "barpolar" + } + ], + "carpet": [ + { + "aaxis": { + "endlinecolor": "#2a3f5f", + "gridcolor": "white", + "linecolor": "white", + "minorgridcolor": "white", + "startlinecolor": "#2a3f5f" + }, + "baxis": { + "endlinecolor": "#2a3f5f", + "gridcolor": "white", + "linecolor": "white", + "minorgridcolor": "white", + "startlinecolor": "#2a3f5f" + }, + "type": "carpet" + } + ], + "choropleth": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "choropleth" + } + ], + "contour": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "contour" + } + ], + "contourcarpet": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "contourcarpet" + } + ], + "heatmap": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "heatmap" + } + ], + "heatmapgl": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "heatmapgl" + } + ], + "histogram": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "histogram" + } + ], + "histogram2d": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "histogram2d" + } + ], + "histogram2dcontour": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "histogram2dcontour" + } + ], + "mesh3d": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "mesh3d" + } + ], + "parcoords": [ + { + "line": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "parcoords" + } + ], + "pie": [ + { + "automargin": true, + "type": "pie" + } + ], + "scatter": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatter" + } + ], + "scatter3d": [ + { + "line": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatter3d" + } + ], + "scattercarpet": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattercarpet" + } + ], + "scattergeo": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattergeo" + } + ], + "scattergl": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattergl" + } + ], + "scattermapbox": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattermapbox" + } + ], + "scatterpolar": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterpolar" + } + ], + "scatterpolargl": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterpolargl" + } + ], + "scatterternary": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterternary" + } + ], + "surface": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "surface" + } + ], + "table": [ + { + "cells": { + "fill": { + "color": "#EBF0F8" + }, + "line": { + "color": "white" + } + }, + "header": { + "fill": { + "color": "#C8D4E3" + }, + "line": { + "color": "white" + } + }, + "type": "table" + } + ] + }, + "layout": { + "annotationdefaults": { + "arrowcolor": "#2a3f5f", + "arrowhead": 0, + "arrowwidth": 1 + }, + "coloraxis": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "colorscale": { + "diverging": [ + [ + 0, + "#8e0152" + ], + [ + 0.1, + "#c51b7d" + ], + [ + 0.2, + "#de77ae" + ], + [ + 0.3, + "#f1b6da" + ], + [ + 0.4, + "#fde0ef" + ], + [ + 0.5, + "#f7f7f7" + ], + [ + 0.6, + "#e6f5d0" + ], + [ + 0.7, + "#b8e186" + ], + [ + 0.8, + "#7fbc41" + ], + [ + 0.9, + "#4d9221" + ], + [ + 1, + "#276419" + ] + ], + "sequential": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "sequentialminus": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ] + }, + "colorway": [ + "#636efa", + "#EF553B", + "#00cc96", + "#ab63fa", + "#FFA15A", + "#19d3f3", + "#FF6692", + "#B6E880", + "#FF97FF", + "#FECB52" + ], + "font": { + "color": "#2a3f5f" + }, + "geo": { + "bgcolor": "white", + "lakecolor": "white", + "landcolor": "#E5ECF6", + "showlakes": true, + "showland": true, + "subunitcolor": "white" + }, + "hoverlabel": { + "align": "left" + }, + "hovermode": "closest", + "mapbox": { + "style": "light" + }, + "paper_bgcolor": "white", + "plot_bgcolor": "#E5ECF6", + "polar": { + "angularaxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + }, + "bgcolor": "#E5ECF6", + "radialaxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + } + }, + "scene": { + "xaxis": { + "backgroundcolor": "#E5ECF6", + "gridcolor": "white", + "gridwidth": 2, + "linecolor": "white", + "showbackground": true, + "ticks": "", + "zerolinecolor": "white" + }, + "yaxis": { + "backgroundcolor": "#E5ECF6", + "gridcolor": "white", + "gridwidth": 2, + "linecolor": "white", + "showbackground": true, + "ticks": "", + "zerolinecolor": "white" + }, + "zaxis": { + "backgroundcolor": "#E5ECF6", + "gridcolor": "white", + "gridwidth": 2, + "linecolor": "white", + "showbackground": true, + "ticks": "", + "zerolinecolor": "white" + } + }, + "shapedefaults": { + "line": { + "color": "#2a3f5f" + } + }, + "ternary": { + "aaxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + }, + "baxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + }, + "bgcolor": "#E5ECF6", + "caxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + } + }, + "title": { + "x": 0.05 + }, + "xaxis": { + "automargin": true, + "gridcolor": "white", + "linecolor": "white", + "ticks": "", + "title": { + "standoff": 15 + }, + "zerolinecolor": "white", + "zerolinewidth": 2 + }, + "yaxis": { + "automargin": true, + "gridcolor": "white", + "linecolor": "white", + "ticks": "", + "title": { + "standoff": 15 + }, + "zerolinecolor": "white", + "zerolinewidth": 2 + } + } + }, + "title": { + "text": "Number of funding deals by location over time" + }, + "xaxis": { + "anchor": "y", + "autorange": true, + "domain": [ + 0, + 1 + ], + "range": [ + 2014.6157988520492, + 2020.3058679178685 + ], + "title": { + "text": "year" + }, + "type": "linear" + }, + "yaxis": { + "anchor": "x", + "autorange": true, + "categoryarray": [ + "Jaipur", + "Chennai", + "Hyderabad", + "Pune", + "Mumbai", + "Bangalore", + "NCR" + ], + "categoryorder": "array", + "domain": [ + 0, + 1 + ], + "range": [ + -0.453165830883048, + 6.785875360298153 + ], + "title": { + "text": "City" + }, + "type": "category" + } + } + }, + "image/png": "", + "text/html": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "fig = px.scatter(temp_df, \n", + " x='year', \n", + " y='City', \n", + " color='City', \n", + " size='size',\n", + " title='Number of funding deals by location over time')\n", + "fig.update_layout(showlegend=False)\n", + "fig.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.plotly.v1+json": { + "config": { + "plotlyServerURL": "https://plot.ly" + }, + "data": [ + { + "hovertemplate": "City=%{y}
year=%{x}
mean=%{marker.size}", + "legendgroup": "NCR", + "marker": { + "color": "#636efa", + "size": [ + 7845871.052631579, + 17164814.814814813, + 10882127.192982456, + 15527416.614285715, + 60785843.41214286, + 53682798 + ], + "sizemode": "area", + "sizeref": 275000, + "symbol": "circle" + }, + "mode": "markers", + "name": "NCR", + "orientation": "h", + "showlegend": true, + "type": "scatter", + "x": [ + 2016, + 2015, + 2017, + 2018, + 2019, + 2020 + ], + "xaxis": "x", + "y": [ + "NCR", + "NCR", + "NCR", + "NCR", + "NCR", + "NCR" + ], + "yaxis": "y" + }, + { + "hovertemplate": "City=%{y}
year=%{x}
mean=%{marker.size}", + "legendgroup": "Bangalore", + "marker": { + "color": "#EF553B", + "size": [ + 6202400.654545454, + 46065863.35403727, + 21502811.5942029, + 17621756.761363637, + 49996734.48275862, + 109179430 + ], + "sizemode": "area", + "sizeref": 275000, + "symbol": "circle" + }, + "mode": "markers", + "name": "Bangalore", + "orientation": "h", + "showlegend": true, + "type": "scatter", + "x": [ + 2016, + 2017, + 2015, + 2018, + 2019, + 2020 + ], + "xaxis": "x", + "y": [ + "Bangalore", + "Bangalore", + "Bangalore", + "Bangalore", + "Bangalore", + "Bangalore" + ], + "yaxis": "y" + }, + { + "hovertemplate": "City=%{y}
year=%{x}
mean=%{marker.size}", + "legendgroup": "Mumbai", + "marker": { + "color": "#00cc96", + "size": [ + 7872235.042735043, + 8438302.521008404, + 11732474.747474747, + 27643448.14814815, + 29938692.916666668, + 1800000 + ], + "sizemode": "area", + "sizeref": 275000, + "symbol": "circle" + }, + "mode": "markers", + "name": "Mumbai", + "orientation": "h", + "showlegend": true, + "type": "scatter", + "x": [ + 2016, + 2015, + 2017, + 2018, + 2019, + 2020 + ], + "xaxis": "x", + "y": [ + "Mumbai", + "Mumbai", + "Mumbai", + "Mumbai", + "Mumbai", + "Mumbai" + ], + "yaxis": "y" + }, + { + "hovertemplate": "City=%{y}
year=%{x}
mean=%{marker.size}", + "legendgroup": "Pune", + "marker": { + "color": "#ab63fa", + "size": [ + 4171769.230769231, + 14153352.94117647, + 3203529.411764706, + 27218625, + 3933333.3333333335 + ], + "sizemode": "area", + "sizeref": 275000, + "symbol": "circle" + }, + "mode": "markers", + "name": "Pune", + "orientation": "h", + "showlegend": true, + "type": "scatter", + "x": [ + 2016, + 2017, + 2015, + 2018, + 2019 + ], + "xaxis": "x", + "y": [ + "Pune", + "Pune", + "Pune", + "Pune", + "Pune" + ], + "yaxis": "y" + }, + { + "hovertemplate": "City=%{y}
year=%{x}
mean=%{marker.size}", + "legendgroup": "Hyderabad", + "marker": { + "color": "#FFA15A", + "size": [ + 8889618.695652174, + 2644761.904761905, + 1868055.5555555555, + 3778571.4285714286, + 27000000 + ], + "sizemode": "area", + "sizeref": 275000, + "symbol": "circle" + }, + "mode": "markers", + "name": "Hyderabad", + "orientation": "h", + "showlegend": true, + "type": "scatter", + "x": [ + 2017, + 2016, + 2015, + 2018, + 2019 + ], + "xaxis": "x", + "y": [ + "Hyderabad", + "Hyderabad", + "Hyderabad", + "Hyderabad", + "Hyderabad" + ], + "yaxis": "y" + }, + { + "hovertemplate": "City=%{y}
year=%{x}
mean=%{marker.size}", + "legendgroup": "Chennai", + "marker": { + "color": "#19d3f3", + "size": [ + 5307045.454545454, + 16126666.666666666, + 4943952.380952381, + 16152600, + 9000000, + 5000000 + ], + "sizemode": "area", + "sizeref": 275000, + "symbol": "circle" + }, + "mode": "markers", + "name": "Chennai", + "orientation": "h", + "showlegend": true, + "type": "scatter", + "x": [ + 2016, + 2015, + 2017, + 2018, + 2020, + 2019 + ], + "xaxis": "x", + "y": [ + "Chennai", + "Chennai", + "Chennai", + "Chennai", + "Chennai", + "Chennai" + ], + "yaxis": "y" + }, + { + "hovertemplate": "City=%{y}
year=%{x}
mean=%{marker.size}", + "legendgroup": "Jaipur", + "marker": { + "color": "#FF6692", + "size": [ + 837500, + 6380000, + 310000, + 2391666.6666666665, + 110000000 + ], + "sizemode": "area", + "sizeref": 275000, + "symbol": "circle" + }, + "mode": "markers", + "name": "Jaipur", + "orientation": "h", + "showlegend": true, + "type": "scatter", + "x": [ + 2016, + 2015, + 2017, + 2018, + 2019 + ], + "xaxis": "x", + "y": [ + "Jaipur", + "Jaipur", + "Jaipur", + "Jaipur", + "Jaipur" + ], + "yaxis": "y" + } + ], + "layout": { + "autosize": true, + "legend": { + "itemsizing": "constant", + "title": { + "text": "City" + }, + "tracegroupgap": 0 + }, + "showlegend": false, + "template": { + "data": { + "bar": [ + { + "error_x": { + "color": "#2a3f5f" + }, + "error_y": { + "color": "#2a3f5f" + }, + "marker": { + "line": { + "color": "#E5ECF6", + "width": 0.5 + } + }, + "type": "bar" + } + ], + "barpolar": [ + { + "marker": { + "line": { + "color": "#E5ECF6", + "width": 0.5 + } + }, + "type": "barpolar" + } + ], + "carpet": [ + { + "aaxis": { + "endlinecolor": "#2a3f5f", + "gridcolor": "white", + "linecolor": "white", + "minorgridcolor": "white", + "startlinecolor": "#2a3f5f" + }, + "baxis": { + "endlinecolor": "#2a3f5f", + "gridcolor": "white", + "linecolor": "white", + "minorgridcolor": "white", + "startlinecolor": "#2a3f5f" + }, + "type": "carpet" + } + ], + "choropleth": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "choropleth" + } + ], + "contour": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "contour" + } + ], + "contourcarpet": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "contourcarpet" + } + ], + "heatmap": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "heatmap" + } + ], + "heatmapgl": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "heatmapgl" + } + ], + "histogram": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "histogram" + } + ], + "histogram2d": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "histogram2d" + } + ], + "histogram2dcontour": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "histogram2dcontour" + } + ], + "mesh3d": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "mesh3d" + } + ], + "parcoords": [ + { + "line": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "parcoords" + } + ], + "pie": [ + { + "automargin": true, + "type": "pie" + } + ], + "scatter": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatter" + } + ], + "scatter3d": [ + { + "line": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatter3d" + } + ], + "scattercarpet": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattercarpet" + } + ], + "scattergeo": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattergeo" + } + ], + "scattergl": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattergl" + } + ], + "scattermapbox": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattermapbox" + } + ], + "scatterpolar": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterpolar" + } + ], + "scatterpolargl": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterpolargl" + } + ], + "scatterternary": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterternary" + } + ], + "surface": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "surface" + } + ], + "table": [ + { + "cells": { + "fill": { + "color": "#EBF0F8" + }, + "line": { + "color": "white" + } + }, + "header": { + "fill": { + "color": "#C8D4E3" + }, + "line": { + "color": "white" + } + }, + "type": "table" + } + ] + }, + "layout": { + "annotationdefaults": { + "arrowcolor": "#2a3f5f", + "arrowhead": 0, + "arrowwidth": 1 + }, + "coloraxis": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "colorscale": { + "diverging": [ + [ + 0, + "#8e0152" + ], + [ + 0.1, + "#c51b7d" + ], + [ + 0.2, + "#de77ae" + ], + [ + 0.3, + "#f1b6da" + ], + [ + 0.4, + "#fde0ef" + ], + [ + 0.5, + "#f7f7f7" + ], + [ + 0.6, + "#e6f5d0" + ], + [ + 0.7, + "#b8e186" + ], + [ + 0.8, + "#7fbc41" + ], + [ + 0.9, + "#4d9221" + ], + [ + 1, + "#276419" + ] + ], + "sequential": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "sequentialminus": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ] + }, + "colorway": [ + "#636efa", + "#EF553B", + "#00cc96", + "#ab63fa", + "#FFA15A", + "#19d3f3", + "#FF6692", + "#B6E880", + "#FF97FF", + "#FECB52" + ], + "font": { + "color": "#2a3f5f" + }, + "geo": { + "bgcolor": "white", + "lakecolor": "white", + "landcolor": "#E5ECF6", + "showlakes": true, + "showland": true, + "subunitcolor": "white" + }, + "hoverlabel": { + "align": "left" + }, + "hovermode": "closest", + "mapbox": { + "style": "light" + }, + "paper_bgcolor": "white", + "plot_bgcolor": "#E5ECF6", + "polar": { + "angularaxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + }, + "bgcolor": "#E5ECF6", + "radialaxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + } + }, + "scene": { + "xaxis": { + "backgroundcolor": "#E5ECF6", + "gridcolor": "white", + "gridwidth": 2, + "linecolor": "white", + "showbackground": true, + "ticks": "", + "zerolinecolor": "white" + }, + "yaxis": { + "backgroundcolor": "#E5ECF6", + "gridcolor": "white", + "gridwidth": 2, + "linecolor": "white", + "showbackground": true, + "ticks": "", + "zerolinecolor": "white" + }, + "zaxis": { + "backgroundcolor": "#E5ECF6", + "gridcolor": "white", + "gridwidth": 2, + "linecolor": "white", + "showbackground": true, + "ticks": "", + "zerolinecolor": "white" + } + }, + "shapedefaults": { + "line": { + "color": "#2a3f5f" + } + }, + "ternary": { + "aaxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + }, + "baxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + }, + "bgcolor": "#E5ECF6", + "caxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + } + }, + "title": { + "x": 0.05 + }, + "xaxis": { + "automargin": true, + "gridcolor": "white", + "linecolor": "white", + "ticks": "", + "title": { + "standoff": 15 + }, + "zerolinecolor": "white", + "zerolinewidth": 2 + }, + "yaxis": { + "automargin": true, + "gridcolor": "white", + "linecolor": "white", + "ticks": "", + "title": { + "standoff": 15 + }, + "zerolinecolor": "white", + "zerolinewidth": 2 + } + } + }, + "title": { + "text": "Mean funding value by location over time" + }, + "xaxis": { + "anchor": "y", + "autorange": true, + "domain": [ + 0, + 1 + ], + "range": [ + 2014.6629551299177, + 2020.3999550853237 + ], + "title": { + "text": "year" + }, + "type": "linear" + }, + "yaxis": { + "anchor": "x", + "autorange": true, + "categoryarray": [ + "Jaipur", + "Chennai", + "Hyderabad", + "Pune", + "Mumbai", + "Bangalore", + "NCR" + ], + "categoryorder": "array", + "domain": [ + 0, + 1 + ], + "range": [ + -0.8163409295350208, + 6.703331926360543 + ], + "title": { + "text": "City" + }, + "type": "category" + } + } + }, + "image/png": "", + "text/html": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "fig = px.scatter(temp_df, \n", + " x='year', \n", + " y='City', \n", + " color='City', \n", + " size='mean',\n", + " title='Mean funding value by location over time')\n", + "fig.update_layout(showlegend=False)\n", + "fig.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Task6: Industry Vertical\n", + "\n", + "Let us now have a look at the industry verticals and the number of funding deals for each vertical.\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Can we get an overview of the Industry verticals and the number of funding deals?" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "819" + ] + }, + "execution_count": 31, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.IndustryVertical.nunique()" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Consumer Internet 941\n", + "Technology 478\n", + "E-Commerce 276\n", + "Healthcare 70\n", + "Finance 62\n", + "Logistics 32\n", + "Education 24\n", + "Food & Beverage 23\n", + "Ed-Tech 14\n", + "E-commerce 12\n", + "Name: IndustryVertical, dtype: int64" + ] + }, + "execution_count": 32, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "top10_industries = df.IndustryVertical.value_counts()[:10]\n", + "top10_industries" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.plotly.v1+json": { + "config": { + "plotlyServerURL": "https://plot.ly" + }, + "data": [ + { + "orientation": "h", + "type": "bar", + "x": [ + 12, + 14, + 23, + 24, + 32, + 62, + 70, + 276, + 478, + 941 + ], + "y": [ + "E-commerce", + "Ed-Tech", + "Food & Beverage", + "Education", + "Logistics", + "Finance", + "Healthcare", + "E-Commerce", + "Technology", + "Consumer Internet" + ] + } + ], + "layout": { + "autosize": true, + "template": { + "data": { + "bar": [ + { + "error_x": { + "color": "#2a3f5f" + }, + "error_y": { + "color": "#2a3f5f" + }, + "marker": { + "line": { + "color": "#E5ECF6", + "width": 0.5 + } + }, + "type": "bar" + } + ], + "barpolar": [ + { + "marker": { + "line": { + "color": "#E5ECF6", + "width": 0.5 + } + }, + "type": "barpolar" + } + ], + "carpet": [ + { + "aaxis": { + "endlinecolor": "#2a3f5f", + "gridcolor": "white", + "linecolor": "white", + "minorgridcolor": "white", + "startlinecolor": "#2a3f5f" + }, + "baxis": { + "endlinecolor": "#2a3f5f", + "gridcolor": "white", + "linecolor": "white", + "minorgridcolor": "white", + "startlinecolor": "#2a3f5f" + }, + "type": "carpet" + } + ], + "choropleth": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "choropleth" + } + ], + "contour": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "contour" + } + ], + "contourcarpet": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "contourcarpet" + } + ], + "heatmap": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "heatmap" + } + ], + "heatmapgl": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "heatmapgl" + } + ], + "histogram": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "histogram" + } + ], + "histogram2d": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "histogram2d" + } + ], + "histogram2dcontour": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "histogram2dcontour" + } + ], + "mesh3d": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "mesh3d" + } + ], + "parcoords": [ + { + "line": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "parcoords" + } + ], + "pie": [ + { + "automargin": true, + "type": "pie" + } + ], + "scatter": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatter" + } + ], + "scatter3d": [ + { + "line": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatter3d" + } + ], + "scattercarpet": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattercarpet" + } + ], + "scattergeo": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattergeo" + } + ], + "scattergl": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattergl" + } + ], + "scattermapbox": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattermapbox" + } + ], + "scatterpolar": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterpolar" + } + ], + "scatterpolargl": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterpolargl" + } + ], + "scatterternary": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterternary" + } + ], + "surface": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "surface" + } + ], + "table": [ + { + "cells": { + "fill": { + "color": "#EBF0F8" + }, + "line": { + "color": "white" + } + }, + "header": { + "fill": { + "color": "#C8D4E3" + }, + "line": { + "color": "white" + } + }, + "type": "table" + } + ] + }, + "layout": { + "annotationdefaults": { + "arrowcolor": "#2a3f5f", + "arrowhead": 0, + "arrowwidth": 1 + }, + "coloraxis": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "colorscale": { + "diverging": [ + [ + 0, + "#8e0152" + ], + [ + 0.1, + "#c51b7d" + ], + [ + 0.2, + "#de77ae" + ], + [ + 0.3, + "#f1b6da" + ], + [ + 0.4, + "#fde0ef" + ], + [ + 0.5, + "#f7f7f7" + ], + [ + 0.6, + "#e6f5d0" + ], + [ + 0.7, + "#b8e186" + ], + [ + 0.8, + "#7fbc41" + ], + [ + 0.9, + "#4d9221" + ], + [ + 1, + "#276419" + ] + ], + "sequential": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "sequentialminus": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ] + }, + "colorway": [ + "#636efa", + "#EF553B", + "#00cc96", + "#ab63fa", + "#FFA15A", + "#19d3f3", + "#FF6692", + "#B6E880", + "#FF97FF", + "#FECB52" + ], + "font": { + "color": "#2a3f5f" + }, + "geo": { + "bgcolor": "white", + "lakecolor": "white", + "landcolor": "#E5ECF6", + "showlakes": true, + "showland": true, + "subunitcolor": "white" + }, + "hoverlabel": { + "align": "left" + }, + "hovermode": "closest", + "mapbox": { + "style": "light" + }, + "paper_bgcolor": "white", + "plot_bgcolor": "#E5ECF6", + "polar": { + "angularaxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + }, + "bgcolor": "#E5ECF6", + "radialaxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + } + }, + "scene": { + "xaxis": { + "backgroundcolor": "#E5ECF6", + "gridcolor": "white", + "gridwidth": 2, + "linecolor": "white", + "showbackground": true, + "ticks": "", + "zerolinecolor": "white" + }, + "yaxis": { + "backgroundcolor": "#E5ECF6", + "gridcolor": "white", + "gridwidth": 2, + "linecolor": "white", + "showbackground": true, + "ticks": "", + "zerolinecolor": "white" + }, + "zaxis": { + "backgroundcolor": "#E5ECF6", + "gridcolor": "white", + "gridwidth": 2, + "linecolor": "white", + "showbackground": true, + "ticks": "", + "zerolinecolor": "white" + } + }, + "shapedefaults": { + "line": { + "color": "#2a3f5f" + } + }, + "ternary": { + "aaxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + }, + "baxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + }, + "bgcolor": "#E5ECF6", + "caxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + } + }, + "title": { + "x": 0.05 + }, + "xaxis": { + "automargin": true, + "gridcolor": "white", + "linecolor": "white", + "ticks": "", + "title": { + "standoff": 15 + }, + "zerolinecolor": "white", + "zerolinewidth": 2 + }, + "yaxis": { + "automargin": true, + "gridcolor": "white", + "linecolor": "white", + "ticks": "", + "title": { + "standoff": 15 + }, + "zerolinecolor": "white", + "zerolinewidth": 2 + } + } + }, + "title": { + "text": "Top 10 industry verticals wrt to funding deals" + }, + "xaxis": { + "autorange": true, + "range": [ + 0, + 990.5263157894736 + ], + "type": "linear" + }, + "yaxis": { + "autorange": true, + "range": [ + -0.5, + 9.5 + ], + "type": "category" + } + } + }, + "image/png": "", + "text/html": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "fig = go.Figure(data = go.Bar(y= top10_industries.index[::-1],\n", + " x= top10_industries.values[::-1],\n", + " orientation = 'h'),\n", + " layout_title_text='Top 10 industry verticals wrt to funding deals')\n", + "fig.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Insight:\n", + "\n", + "\n", + "Consumer Internet is the most preferred industry segment for funding followed by Technology and E-commerce.\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
IndustryVerticalyearsizemean
115Consumer Internet20165396.291066e+06
116Consumer Internet20173091.272341e+07
776Technology20172237.064185e+06
775Technology20161905.687608e+06
176E-Commerce20161639.614323e+06
\n", + "
" + ], + "text/plain": [ + " IndustryVertical year size mean\n", + "115 Consumer Internet 2016 539 6.291066e+06\n", + "116 Consumer Internet 2017 309 1.272341e+07\n", + "776 Technology 2017 223 7.064185e+06\n", + "775 Technology 2016 190 5.687608e+06\n", + "176 E-Commerce 2016 163 9.614323e+06" + ] + }, + "execution_count": 34, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "temp_df=(df.groupby(['IndustryVertical','year'])\n", + " .CleanedAmount.agg(['size','mean'])\n", + " .reset_index()\n", + " .sort_values('size',ascending=False)\n", + ")\n", + "temp_df = temp_df.loc[temp_df.IndustryVertical.isin(top10_industries.index)]\n", + "temp_df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.plotly.v1+json": { + "config": { + "plotlyServerURL": "https://plot.ly" + }, + "data": [ + { + "hovertemplate": "IndustryVertical=%{y}
year=%{x}
size=%{marker.size}", + "legendgroup": "Consumer Internet", + "marker": { + "color": "#636efa", + "size": [ + 539, + 309, + 93 + ], + "sizemode": "area", + "sizeref": 1.3475, + "symbol": "circle" + }, + "mode": "markers", + "name": "Consumer Internet", + "orientation": "h", + "showlegend": true, + "type": "scatter", + "x": [ + 2016, + 2017, + 2018 + ], + "xaxis": "x", + "y": [ + "Consumer Internet", + "Consumer Internet", + "Consumer Internet" + ], + "yaxis": "y" + }, + { + "hovertemplate": "IndustryVertical=%{y}
year=%{x}
size=%{marker.size}", + "legendgroup": "Technology", + "marker": { + "color": "#EF553B", + "size": [ + 223, + 190, + 62, + 3 + ], + "sizemode": "area", + "sizeref": 1.3475, + "symbol": "circle" + }, + "mode": "markers", + "name": "Technology", + "orientation": "h", + "showlegend": true, + "type": "scatter", + "x": [ + 2017, + 2016, + 2018, + 2019 + ], + "xaxis": "x", + "y": [ + "Technology", + "Technology", + "Technology", + "Technology" + ], + "yaxis": "y" + }, + { + "hovertemplate": "IndustryVertical=%{y}
year=%{x}
size=%{marker.size}", + "legendgroup": "E-Commerce", + "marker": { + "color": "#00cc96", + "size": [ + 163, + 94, + 11, + 7, + 1 + ], + "sizemode": "area", + "sizeref": 1.3475, + "symbol": "circle" + }, + "mode": "markers", + "name": "E-Commerce", + "orientation": "h", + "showlegend": true, + "type": "scatter", + "x": [ + 2016, + 2017, + 2019, + 2018, + 2015 + ], + "xaxis": "x", + "y": [ + "E-Commerce", + "E-Commerce", + "E-Commerce", + "E-Commerce", + "E-Commerce" + ], + "yaxis": "y" + }, + { + "hovertemplate": "IndustryVertical=%{y}
year=%{x}
size=%{marker.size}", + "legendgroup": "Finance", + "marker": { + "color": "#ab63fa", + "size": [ + 37, + 10, + 8, + 7 + ], + "sizemode": "area", + "sizeref": 1.3475, + "symbol": "circle" + }, + "mode": "markers", + "name": "Finance", + "orientation": "h", + "showlegend": true, + "type": "scatter", + "x": [ + 2018, + 2017, + 2019, + 2016 + ], + "xaxis": "x", + "y": [ + "Finance", + "Finance", + "Finance", + "Finance" + ], + "yaxis": "y" + }, + { + "hovertemplate": "IndustryVertical=%{y}
year=%{x}
size=%{marker.size}", + "legendgroup": "Healthcare", + "marker": { + "color": "#FFA15A", + "size": [ + 33, + 22, + 14, + 1 + ], + "sizemode": "area", + "sizeref": 1.3475, + "symbol": "circle" + }, + "mode": "markers", + "name": "Healthcare", + "orientation": "h", + "showlegend": true, + "type": "scatter", + "x": [ + 2018, + 2016, + 2017, + 2019 + ], + "xaxis": "x", + "y": [ + "Healthcare", + "Healthcare", + "Healthcare", + "Healthcare" + ], + "yaxis": "y" + }, + { + "hovertemplate": "IndustryVertical=%{y}
year=%{x}
size=%{marker.size}", + "legendgroup": "Education", + "marker": { + "color": "#19d3f3", + "size": [ + 19, + 2, + 2, + 1 + ], + "sizemode": "area", + "sizeref": 1.3475, + "symbol": "circle" + }, + "mode": "markers", + "name": "Education", + "orientation": "h", + "showlegend": true, + "type": "scatter", + "x": [ + 2016, + 2019, + 2018, + 2017 + ], + "xaxis": "x", + "y": [ + "Education", + "Education", + "Education", + "Education" + ], + "yaxis": "y" + }, + { + "hovertemplate": "IndustryVertical=%{y}
year=%{x}
size=%{marker.size}", + "legendgroup": "Logistics", + "marker": { + "color": "#FF6692", + "size": [ + 16, + 14, + 1, + 1 + ], + "sizemode": "area", + "sizeref": 1.3475, + "symbol": "circle" + }, + "mode": "markers", + "name": "Logistics", + "orientation": "h", + "showlegend": true, + "type": "scatter", + "x": [ + 2016, + 2017, + 2018, + 2020 + ], + "xaxis": "x", + "y": [ + "Logistics", + "Logistics", + "Logistics", + "Logistics" + ], + "yaxis": "y" + }, + { + "hovertemplate": "IndustryVertical=%{y}
year=%{x}
size=%{marker.size}", + "legendgroup": "Food & Beverage", + "marker": { + "color": "#B6E880", + "size": [ + 15, + 8 + ], + "sizemode": "area", + "sizeref": 1.3475, + "symbol": "circle" + }, + "mode": "markers", + "name": "Food & Beverage", + "orientation": "h", + "showlegend": true, + "type": "scatter", + "x": [ + 2016, + 2017 + ], + "xaxis": "x", + "y": [ + "Food & Beverage", + "Food & Beverage" + ], + "yaxis": "y" + }, + { + "hovertemplate": "IndustryVertical=%{y}
year=%{x}
size=%{marker.size}", + "legendgroup": "Ed-Tech", + "marker": { + "color": "#FF97FF", + "size": [ + 12, + 2 + ], + "sizemode": "area", + "sizeref": 1.3475, + "symbol": "circle" + }, + "mode": "markers", + "name": "Ed-Tech", + "orientation": "h", + "showlegend": true, + "type": "scatter", + "x": [ + 2018, + 2015 + ], + "xaxis": "x", + "y": [ + "Ed-Tech", + "Ed-Tech" + ], + "yaxis": "y" + }, + { + "hovertemplate": "IndustryVertical=%{y}
year=%{x}
size=%{marker.size}", + "legendgroup": "E-commerce", + "marker": { + "color": "#FECB52", + "size": [ + 7, + 4, + 1 + ], + "sizemode": "area", + "sizeref": 1.3475, + "symbol": "circle" + }, + "mode": "markers", + "name": "E-commerce", + "orientation": "h", + "showlegend": true, + "type": "scatter", + "x": [ + 2019, + 2018, + 2020 + ], + "xaxis": "x", + "y": [ + "E-commerce", + "E-commerce", + "E-commerce" + ], + "yaxis": "y" + } + ], + "layout": { + "autosize": true, + "legend": { + "itemsizing": "constant", + "title": { + "text": "IndustryVertical" + }, + "tracegroupgap": 0 + }, + "showlegend": false, + "template": { + "data": { + "bar": [ + { + "error_x": { + "color": "#2a3f5f" + }, + "error_y": { + "color": "#2a3f5f" + }, + "marker": { + "line": { + "color": "#E5ECF6", + "width": 0.5 + } + }, + "type": "bar" + } + ], + "barpolar": [ + { + "marker": { + "line": { + "color": "#E5ECF6", + "width": 0.5 + } + }, + "type": "barpolar" + } + ], + "carpet": [ + { + "aaxis": { + "endlinecolor": "#2a3f5f", + "gridcolor": "white", + "linecolor": "white", + "minorgridcolor": "white", + "startlinecolor": "#2a3f5f" + }, + "baxis": { + "endlinecolor": "#2a3f5f", + "gridcolor": "white", + "linecolor": "white", + "minorgridcolor": "white", + "startlinecolor": "#2a3f5f" + }, + "type": "carpet" + } + ], + "choropleth": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "choropleth" + } + ], + "contour": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "contour" + } + ], + "contourcarpet": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "contourcarpet" + } + ], + "heatmap": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "heatmap" + } + ], + "heatmapgl": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "heatmapgl" + } + ], + "histogram": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "histogram" + } + ], + "histogram2d": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "histogram2d" + } + ], + "histogram2dcontour": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "histogram2dcontour" + } + ], + "mesh3d": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "mesh3d" + } + ], + "parcoords": [ + { + "line": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "parcoords" + } + ], + "pie": [ + { + "automargin": true, + "type": "pie" + } + ], + "scatter": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatter" + } + ], + "scatter3d": [ + { + "line": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatter3d" + } + ], + "scattercarpet": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattercarpet" + } + ], + "scattergeo": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattergeo" + } + ], + "scattergl": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattergl" + } + ], + "scattermapbox": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattermapbox" + } + ], + "scatterpolar": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterpolar" + } + ], + "scatterpolargl": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterpolargl" + } + ], + "scatterternary": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterternary" + } + ], + "surface": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "surface" + } + ], + "table": [ + { + "cells": { + "fill": { + "color": "#EBF0F8" + }, + "line": { + "color": "white" + } + }, + "header": { + "fill": { + "color": "#C8D4E3" + }, + "line": { + "color": "white" + } + }, + "type": "table" + } + ] + }, + "layout": { + "annotationdefaults": { + "arrowcolor": "#2a3f5f", + "arrowhead": 0, + "arrowwidth": 1 + }, + "coloraxis": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "colorscale": { + "diverging": [ + [ + 0, + "#8e0152" + ], + [ + 0.1, + "#c51b7d" + ], + [ + 0.2, + "#de77ae" + ], + [ + 0.3, + "#f1b6da" + ], + [ + 0.4, + "#fde0ef" + ], + [ + 0.5, + "#f7f7f7" + ], + [ + 0.6, + "#e6f5d0" + ], + [ + 0.7, + "#b8e186" + ], + [ + 0.8, + "#7fbc41" + ], + [ + 0.9, + "#4d9221" + ], + [ + 1, + "#276419" + ] + ], + "sequential": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "sequentialminus": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ] + }, + "colorway": [ + "#636efa", + "#EF553B", + "#00cc96", + "#ab63fa", + "#FFA15A", + "#19d3f3", + "#FF6692", + "#B6E880", + "#FF97FF", + "#FECB52" + ], + "font": { + "color": "#2a3f5f" + }, + "geo": { + "bgcolor": "white", + "lakecolor": "white", + "landcolor": "#E5ECF6", + "showlakes": true, + "showland": true, + "subunitcolor": "white" + }, + "hoverlabel": { + "align": "left" + }, + "hovermode": "closest", + "mapbox": { + "style": "light" + }, + "paper_bgcolor": "white", + "plot_bgcolor": "#E5ECF6", + "polar": { + "angularaxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + }, + "bgcolor": "#E5ECF6", + "radialaxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + } + }, + "scene": { + "xaxis": { + "backgroundcolor": "#E5ECF6", + "gridcolor": "white", + "gridwidth": 2, + "linecolor": "white", + "showbackground": true, + "ticks": "", + "zerolinecolor": "white" + }, + "yaxis": { + "backgroundcolor": "#E5ECF6", + "gridcolor": "white", + "gridwidth": 2, + "linecolor": "white", + "showbackground": true, + "ticks": "", + "zerolinecolor": "white" + }, + "zaxis": { + "backgroundcolor": "#E5ECF6", + "gridcolor": "white", + "gridwidth": 2, + "linecolor": "white", + "showbackground": true, + "ticks": "", + "zerolinecolor": "white" + } + }, + "shapedefaults": { + "line": { + "color": "#2a3f5f" + } + }, + "ternary": { + "aaxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + }, + "baxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + }, + "bgcolor": "#E5ECF6", + "caxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + } + }, + "title": { + "x": 0.05 + }, + "xaxis": { + "automargin": true, + "gridcolor": "white", + "linecolor": "white", + "ticks": "", + "title": { + "standoff": 15 + }, + "zerolinecolor": "white", + "zerolinewidth": 2 + }, + "yaxis": { + "automargin": true, + "gridcolor": "white", + "linecolor": "white", + "ticks": "", + "title": { + "standoff": 15 + }, + "zerolinecolor": "white", + "zerolinewidth": 2 + } + } + }, + "title": { + "text": "Number of funding deals by industry over time" + }, + "xaxis": { + "anchor": "y", + "autorange": true, + "domain": [ + 0, + 1 + ], + "range": [ + 2014.6974758723088, + 2020.3025241276912 + ], + "title": { + "text": "year" + }, + "type": "linear" + }, + "yaxis": { + "anchor": "x", + "autorange": true, + "categoryarray": [ + "E-commerce", + "Ed-Tech", + "Food & Beverage", + "Logistics", + "Education", + "Healthcare", + "Finance", + "E-Commerce", + "Technology", + "Consumer Internet" + ], + "categoryorder": "array", + "domain": [ + 0, + 1 + ], + "range": [ + -0.6623886863587624, + 10.176698908602804 + ], + "title": { + "text": "IndustryVertical" + }, + "type": "category" + } + } + }, + "image/png": "", + "text/html": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "fig = px.scatter(temp_df, \n", + " x='year', \n", + " y='IndustryVertical', \n", + " color='IndustryVertical', \n", + " size='size',\n", + " title='Number of funding deals by industry over time')\n", + "fig.update_layout(showlegend=False)\n", + "fig.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Insight:\n", + "Earlier Consumer Internet and Technology startups saw many deals, however in the recent past Finance and Healthcare are growing" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.plotly.v1+json": { + "config": { + "plotlyServerURL": "https://plot.ly" + }, + "data": [ + { + "hovertemplate": "IndustryVertical=%{y}
year=%{x}
mean=%{marker.size}", + "legendgroup": "Consumer Internet", + "marker": { + "color": "#636efa", + "size": [ + 6291065.573770491, + 12723411.764705881, + 21734165.5625 + ], + "sizemode": "area", + "sizeref": 251875, + "symbol": "circle" + }, + "mode": "markers", + "name": "Consumer Internet", + "orientation": "h", + "showlegend": true, + "type": "scatter", + "x": [ + 2016, + 2017, + 2018 + ], + "xaxis": "x", + "y": [ + "Consumer Internet", + "Consumer Internet", + "Consumer Internet" + ], + "yaxis": "y" + }, + { + "hovertemplate": "IndustryVertical=%{y}
year=%{x}
mean=%{marker.size}", + "legendgroup": "Technology", + "marker": { + "color": "#EF553B", + "size": [ + 7064185.107913669, + 5687608.333333333, + 11172358.333333334, + 9666666.666666666 + ], + "sizemode": "area", + "sizeref": 251875, + "symbol": "circle" + }, + "mode": "markers", + "name": "Technology", + "orientation": "h", + "showlegend": true, + "type": "scatter", + "x": [ + 2017, + 2016, + 2018, + 2019 + ], + "xaxis": "x", + "y": [ + "Technology", + "Technology", + "Technology", + "Technology" + ], + "yaxis": "y" + }, + { + "hovertemplate": "IndustryVertical=%{y}
year=%{x}
mean=%{marker.size}", + "legendgroup": "E-Commerce", + "marker": { + "color": "#00cc96", + "size": [ + 9614322.851485148, + 84792914.28571428, + 81171815.95818181, + 13333348.166666666, + 10000000 + ], + "sizemode": "area", + "sizeref": 251875, + "symbol": "circle" + }, + "mode": "markers", + "name": "E-Commerce", + "orientation": "h", + "showlegend": true, + "type": "scatter", + "x": [ + 2016, + 2017, + 2019, + 2018, + 2015 + ], + "xaxis": "x", + "y": [ + "E-Commerce", + "E-Commerce", + "E-Commerce", + "E-Commerce", + "E-Commerce" + ], + "yaxis": "y" + }, + { + "hovertemplate": "IndustryVertical=%{y}
year=%{x}
mean=%{marker.size}", + "legendgroup": "Finance", + "marker": { + "color": "#ab63fa", + "size": [ + 31963764.70588235, + 61007777.777777776, + 37987500, + 5283333.333333333 + ], + "sizemode": "area", + "sizeref": 251875, + "symbol": "circle" + }, + "mode": "markers", + "name": "Finance", + "orientation": "h", + "showlegend": true, + "type": "scatter", + "x": [ + 2018, + 2017, + 2019, + 2016 + ], + "xaxis": "x", + "y": [ + "Finance", + "Finance", + "Finance", + "Finance" + ], + "yaxis": "y" + }, + { + "hovertemplate": "IndustryVertical=%{y}
year=%{x}
mean=%{marker.size}", + "legendgroup": "Healthcare", + "marker": { + "color": "#FFA15A", + "size": [ + 9359000, + 1711111.111111111, + 12893666.666666666, + 15800000 + ], + "sizemode": "area", + "sizeref": 251875, + "symbol": "circle" + }, + "mode": "markers", + "name": "Healthcare", + "orientation": "h", + "showlegend": true, + "type": "scatter", + "x": [ + 2018, + 2016, + 2017, + 2019 + ], + "xaxis": "x", + "y": [ + "Healthcare", + "Healthcare", + "Healthcare", + "Healthcare" + ], + "yaxis": "y" + }, + { + "hovertemplate": "IndustryVertical=%{y}
year=%{x}
mean=%{marker.size}", + "legendgroup": "Education", + "marker": { + "color": "#19d3f3", + "size": [ + 5897500, + 100750000, + 4439000, + 8200000 + ], + "sizemode": "area", + "sizeref": 251875, + "symbol": "circle" + }, + "mode": "markers", + "name": "Education", + "orientation": "h", + "showlegend": true, + "type": "scatter", + "x": [ + 2016, + 2019, + 2018, + 2017 + ], + "xaxis": "x", + "y": [ + "Education", + "Education", + "Education", + "Education" + ], + "yaxis": "y" + }, + { + "hovertemplate": "IndustryVertical=%{y}
year=%{x}
mean=%{marker.size}", + "legendgroup": "Logistics", + "marker": { + "color": "#FF6692", + "size": [ + 3814500, + 16790909.09090909, + 11000000, + 9000000 + ], + "sizemode": "area", + "sizeref": 251875, + "symbol": "circle" + }, + "mode": "markers", + "name": "Logistics", + "orientation": "h", + "showlegend": true, + "type": "scatter", + "x": [ + 2016, + 2017, + 2018, + 2020 + ], + "xaxis": "x", + "y": [ + "Logistics", + "Logistics", + "Logistics", + "Logistics" + ], + "yaxis": "y" + }, + { + "hovertemplate": "IndustryVertical=%{y}
year=%{x}
mean=%{marker.size}", + "legendgroup": "Food & Beverage", + "marker": { + "color": "#B6E880", + "size": [ + 2637333.3333333335, + 3083333.3333333335 + ], + "sizemode": "area", + "sizeref": 251875, + "symbol": "circle" + }, + "mode": "markers", + "name": "Food & Beverage", + "orientation": "h", + "showlegend": true, + "type": "scatter", + "x": [ + 2016, + 2017 + ], + "xaxis": "x", + "y": [ + "Food & Beverage", + "Food & Beverage" + ], + "yaxis": "y" + }, + { + "hovertemplate": "IndustryVertical=%{y}
year=%{x}
mean=%{marker.size}", + "legendgroup": "Ed-Tech", + "marker": { + "color": "#FF97FF", + "size": [ + 3022603.6363636362, + 300000 + ], + "sizemode": "area", + "sizeref": 251875, + "symbol": "circle" + }, + "mode": "markers", + "name": "Ed-Tech", + "orientation": "h", + "showlegend": true, + "type": "scatter", + "x": [ + 2018, + 2015 + ], + "xaxis": "x", + "y": [ + "Ed-Tech", + "Ed-Tech" + ], + "yaxis": "y" + }, + { + "hovertemplate": "IndustryVertical=%{y}
year=%{x}
mean=%{marker.size}", + "legendgroup": "E-commerce", + "marker": { + "color": "#FECB52", + "size": [ + 6874832, + 10920000, + 18358860 + ], + "sizemode": "area", + "sizeref": 251875, + "symbol": "circle" + }, + "mode": "markers", + "name": "E-commerce", + "orientation": "h", + "showlegend": true, + "type": "scatter", + "x": [ + 2019, + 2018, + 2020 + ], + "xaxis": "x", + "y": [ + "E-commerce", + "E-commerce", + "E-commerce" + ], + "yaxis": "y" + } + ], + "layout": { + "autosize": true, + "legend": { + "itemsizing": "constant", + "title": { + "text": "IndustryVertical" + }, + "tracegroupgap": 0 + }, + "showlegend": false, + "template": { + "data": { + "bar": [ + { + "error_x": { + "color": "#2a3f5f" + }, + "error_y": { + "color": "#2a3f5f" + }, + "marker": { + "line": { + "color": "#E5ECF6", + "width": 0.5 + } + }, + "type": "bar" + } + ], + "barpolar": [ + { + "marker": { + "line": { + "color": "#E5ECF6", + "width": 0.5 + } + }, + "type": "barpolar" + } + ], + "carpet": [ + { + "aaxis": { + "endlinecolor": "#2a3f5f", + "gridcolor": "white", + "linecolor": "white", + "minorgridcolor": "white", + "startlinecolor": "#2a3f5f" + }, + "baxis": { + "endlinecolor": "#2a3f5f", + "gridcolor": "white", + "linecolor": "white", + "minorgridcolor": "white", + "startlinecolor": "#2a3f5f" + }, + "type": "carpet" + } + ], + "choropleth": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "choropleth" + } + ], + "contour": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "contour" + } + ], + "contourcarpet": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "contourcarpet" + } + ], + "heatmap": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "heatmap" + } + ], + "heatmapgl": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "heatmapgl" + } + ], + "histogram": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "histogram" + } + ], + "histogram2d": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "histogram2d" + } + ], + "histogram2dcontour": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "histogram2dcontour" + } + ], + "mesh3d": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "mesh3d" + } + ], + "parcoords": [ + { + "line": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "parcoords" + } + ], + "pie": [ + { + "automargin": true, + "type": "pie" + } + ], + "scatter": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatter" + } + ], + "scatter3d": [ + { + "line": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatter3d" + } + ], + "scattercarpet": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattercarpet" + } + ], + "scattergeo": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattergeo" + } + ], + "scattergl": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattergl" + } + ], + "scattermapbox": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattermapbox" + } + ], + "scatterpolar": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterpolar" + } + ], + "scatterpolargl": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterpolargl" + } + ], + "scatterternary": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterternary" + } + ], + "surface": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "surface" + } + ], + "table": [ + { + "cells": { + "fill": { + "color": "#EBF0F8" + }, + "line": { + "color": "white" + } + }, + "header": { + "fill": { + "color": "#C8D4E3" + }, + "line": { + "color": "white" + } + }, + "type": "table" + } + ] + }, + "layout": { + "annotationdefaults": { + "arrowcolor": "#2a3f5f", + "arrowhead": 0, + "arrowwidth": 1 + }, + "coloraxis": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "colorscale": { + "diverging": [ + [ + 0, + "#8e0152" + ], + [ + 0.1, + "#c51b7d" + ], + [ + 0.2, + "#de77ae" + ], + [ + 0.3, + "#f1b6da" + ], + [ + 0.4, + "#fde0ef" + ], + [ + 0.5, + "#f7f7f7" + ], + [ + 0.6, + "#e6f5d0" + ], + [ + 0.7, + "#b8e186" + ], + [ + 0.8, + "#7fbc41" + ], + [ + 0.9, + "#4d9221" + ], + [ + 1, + "#276419" + ] + ], + "sequential": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "sequentialminus": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ] + }, + "colorway": [ + "#636efa", + "#EF553B", + "#00cc96", + "#ab63fa", + "#FFA15A", + "#19d3f3", + "#FF6692", + "#B6E880", + "#FF97FF", + "#FECB52" + ], + "font": { + "color": "#2a3f5f" + }, + "geo": { + "bgcolor": "white", + "lakecolor": "white", + "landcolor": "#E5ECF6", + "showlakes": true, + "showland": true, + "subunitcolor": "white" + }, + "hoverlabel": { + "align": "left" + }, + "hovermode": "closest", + "mapbox": { + "style": "light" + }, + "paper_bgcolor": "white", + "plot_bgcolor": "#E5ECF6", + "polar": { + "angularaxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + }, + "bgcolor": "#E5ECF6", + "radialaxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + } + }, + "scene": { + "xaxis": { + "backgroundcolor": "#E5ECF6", + "gridcolor": "white", + "gridwidth": 2, + "linecolor": "white", + "showbackground": true, + "ticks": "", + "zerolinecolor": "white" + }, + "yaxis": { + "backgroundcolor": "#E5ECF6", + "gridcolor": "white", + "gridwidth": 2, + "linecolor": "white", + "showbackground": true, + "ticks": "", + "zerolinecolor": "white" + }, + "zaxis": { + "backgroundcolor": "#E5ECF6", + "gridcolor": "white", + "gridwidth": 2, + "linecolor": "white", + "showbackground": true, + "ticks": "", + "zerolinecolor": "white" + } + }, + "shapedefaults": { + "line": { + "color": "#2a3f5f" + } + }, + "ternary": { + "aaxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + }, + "baxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + }, + "bgcolor": "#E5ECF6", + "caxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + } + }, + "title": { + "x": 0.05 + }, + "xaxis": { + "automargin": true, + "gridcolor": "white", + "linecolor": "white", + "ticks": "", + "title": { + "standoff": 15 + }, + "zerolinecolor": "white", + "zerolinewidth": 2 + }, + "yaxis": { + "automargin": true, + "gridcolor": "white", + "linecolor": "white", + "ticks": "", + "title": { + "standoff": 15 + }, + "zerolinecolor": "white", + "zerolinewidth": 2 + } + } + }, + "title": { + "text": "Mean funding value by industry over time" + }, + "xaxis": { + "anchor": "y", + "autorange": true, + "domain": [ + 0, + 1 + ], + "range": [ + 2014.6800439653498, + 2020.3331950815077 + ], + "title": { + "text": "year" + }, + "type": "linear" + }, + "yaxis": { + "anchor": "x", + "autorange": true, + "categoryarray": [ + "E-commerce", + "Ed-Tech", + "Food & Beverage", + "Logistics", + "Education", + "Healthcare", + "Finance", + "E-Commerce", + "Technology", + "Consumer Internet" + ], + "categoryorder": "array", + "domain": [ + 0, + 1 + ], + "range": [ + -0.7961552955917495, + 9.819521105889217 + ], + "title": { + "text": "IndustryVertical" + }, + "type": "category" + } + } + }, + "image/png": "", + "text/html": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "fig = px.scatter(temp_df, \n", + " x='year', \n", + " y='IndustryVertical', \n", + " color='IndustryVertical', \n", + " size='mean',\n", + " title='Mean funding value by industry over time')\n", + "fig.update_layout(showlegend=False)\n", + "fig.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Insight:\n", + "E-commerce is by far the most popular investment industry, Education, Finance and Healthcare are picking up." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Do it yourself\n", + "\n", + "* Can we get information about the investors?\n", + "* Can we get information about the subvertical?" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.3" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +}