diff --git a/_drafts/2020-03-30-us-inflection.ipynb b/_drafts/2020-03-30-us-inflection.ipynb deleted file mode 100755 index 8334398ef..000000000 --- a/_drafts/2020-03-30-us-inflection.ipynb +++ /dev/null @@ -1,1661 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "papermill": { - "duration": 0.028764, - "end_time": "2020-04-28T12:14:41.479261", - "exception": false, - "start_time": "2020-04-28T12:14:41.450497", - "status": "completed" - }, - "tags": [] - }, - "source": [ - "# Where is the curve flattening?\n", - "> Inflection-sensitive chart for detecting successful interventions, from the article \"How To Tell If We're Beating COVID-19\".\n", - "\n", - "- author: Daniel Cox, Martin Boehler\n", - "- categories: [compare, europe, growth, interactive, plotly, states, US, usa]\n", - "- image: images/where-are-we-winning.png\n", - "- permalink: /us-inflection/\n", - "- toc: true" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": { - "ExecuteTime": { - "end_time": "2020-04-03T15:07:27.881339Z", - "start_time": "2020-04-03T15:07:27.347336Z" - }, - "execution": { - "iopub.execute_input": "2020-04-28T12:14:41.531320Z", - "iopub.status.busy": "2020-04-28T12:14:41.530650Z", - "iopub.status.idle": "2020-04-28T12:14:42.665588Z", - "shell.execute_reply": "2020-04-28T12:14:42.664451Z" - }, - "papermill": { - "duration": 1.163868, - "end_time": "2020-04-28T12:14:42.665738", - "exception": false, - "start_time": "2020-04-28T12:14:41.501870", - "status": "completed" - }, - "tags": [] - }, - "outputs": [], - "source": [ - "#hide\n", - "%matplotlib inline\n", - "import sys\n", - "import math\n", - "import requests\n", - "import pandas as pd\n", - "import numpy as np\n", - "import matplotlib.pyplot as plt\n", - "import ipywidgets as widgets\n", - "from ipywidgets import interact\n", - "from datetime import datetime\n", - "from IPython.display import HTML\n", - "# import plotly.io as pio\n", - "# # pio.renderers.default = 'notebook_connected'\n", - "# pio.renderers.default = 'notebook+'\n", - "\n", - "import plotly.express as px\n", - "import plotly.graph_objects as go" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": { - "ExecuteTime": { - "end_time": "2020-04-03T15:07:28.057019Z", - "start_time": "2020-04-03T15:07:27.882628Z" - }, - "execution": { - "iopub.execute_input": "2020-04-28T12:14:42.721412Z", - "iopub.status.busy": "2020-04-28T12:14:42.720796Z", - "iopub.status.idle": "2020-04-28T12:14:43.576687Z", - "shell.execute_reply": "2020-04-28T12:14:43.576083Z" - }, - "papermill": { - "duration": 0.888904, - "end_time": "2020-04-28T12:14:43.576824", - "exception": false, - "start_time": "2020-04-28T12:14:42.687920", - "status": "completed" - }, - "scrolled": true, - "tags": [] - }, - "outputs": [], - "source": [ - "#hide\n", - "states_url = \"https://covidtracking.com/api/states/daily\"\n", - "case_threshold = 10000 # TODO I don't want to drop states below 100\n", - "\n", - "r = requests.get(states_url)\n", - "states_df = pd.DataFrame(r.json())\n", - "states_df['date'] = pd.to_datetime(states_df.date, format=\"%Y%m%d\")\n", - "states_df = states_df[['date', 'state', 'positive']].sort_values('date')\n", - "states_df = states_df.rename(columns={'positive': 'confirmed'})\n", - "cols = {}\n", - "for state in states_df.state.unique():\n", - " cases = states_df[(states_df.state == state) & (states_df.confirmed > case_threshold)]\n", - " cases = cases.reset_index().confirmed.reset_index(drop=True)\n", - " if len(cases) > 1:\n", - " cols[state] = cases\n", - "\n", - "df = states_df.reset_index()\n", - "# df" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": { - "ExecuteTime": { - "end_time": "2020-04-03T15:07:28.077150Z", - "start_time": "2020-04-03T15:07:28.058428Z" - }, - "execution": { - "iopub.execute_input": "2020-04-28T12:14:43.631696Z", - "iopub.status.busy": "2020-04-28T12:14:43.631073Z", - "iopub.status.idle": "2020-04-28T12:14:43.669769Z", - "shell.execute_reply": "2020-04-28T12:14:43.668934Z" - }, - "papermill": { - "duration": 0.07062, - "end_time": "2020-04-28T12:14:43.669952", - "exception": false, - "start_time": "2020-04-28T12:14:43.599332", - "status": "completed" - }, - "tags": [] - }, - "outputs": [], - "source": [ - "# hide\n", - "df = (df.assign(daily_new=df.groupby('state', as_index=False)[['confirmed']]\n", - " .diff().fillna(0)\n", - " .reset_index(0, drop=True)))" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": { - "ExecuteTime": { - "end_time": "2020-04-03T15:07:28.121412Z", - "start_time": "2020-04-03T15:07:28.078199Z" - }, - "execution": { - "iopub.execute_input": "2020-04-28T12:14:43.733982Z", - "iopub.status.busy": "2020-04-28T12:14:43.725252Z", - "iopub.status.idle": "2020-04-28T12:14:43.815117Z", - "shell.execute_reply": "2020-04-28T12:14:43.815599Z" - }, - "papermill": { - "duration": 0.119693, - "end_time": "2020-04-28T12:14:43.815737", - "exception": false, - "start_time": "2020-04-28T12:14:43.696044", - "status": "completed" - }, - "tags": [] - }, - "outputs": [], - "source": [ - "#hide\n", - "df = (df.assign(avg_daily_new=df.groupby('state', as_index=False)[['daily_new']]\n", - " .rolling(7).mean()\n", - " .reset_index(0, drop=True)))" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": { - "ExecuteTime": { - "end_time": "2020-04-03T15:07:28.126655Z", - "start_time": "2020-04-03T15:07:28.122313Z" - }, - "execution": { - "iopub.execute_input": "2020-04-28T12:14:43.867950Z", - "iopub.status.busy": "2020-04-28T12:14:43.867316Z", - "iopub.status.idle": "2020-04-28T12:14:43.870356Z", - "shell.execute_reply": "2020-04-28T12:14:43.869848Z" - }, - "papermill": { - "duration": 0.033092, - "end_time": "2020-04-28T12:14:43.870459", - "exception": false, - "start_time": "2020-04-28T12:14:43.837367", - "status": "completed" - }, - "tags": [] - }, - "outputs": [], - "source": [ - "#hide\n", - "state_names = {\n", - " \"AL\": \"Alabama\",\n", - " \"AK\": \"Alaska\",\n", - " \"AS\": \"American Samoa\",\n", - " \"AZ\": \"Arizona\",\n", - " \"AR\": \"Arkansas\",\n", - " \"CA\": \"California\",\n", - " \"CO\": \"Colorado\",\n", - " \"CT\": \"Connecticut\",\n", - " \"DE\": \"Delaware\",\n", - " \"DC\": \"District Of Columbia\",\n", - " \"FM\": \"Federated States Of Micronesia\",\n", - " \"FL\": \"Florida\",\n", - " \"GA\": \"Georgia\",\n", - " \"GU\": \"Guam\",\n", - " \"HI\": \"Hawaii\",\n", - " \"ID\": \"Idaho\",\n", - " \"IL\": \"Illinois\",\n", - " \"IN\": \"Indiana\",\n", - " \"IA\": \"Iowa\",\n", - " \"KS\": \"Kansas\",\n", - " \"KY\": \"Kentucky\",\n", - " \"LA\": \"Louisiana\",\n", - " \"ME\": \"Maine\",\n", - " \"MH\": \"Marshall Islands\",\n", - " \"MD\": \"Maryland\",\n", - " \"MA\": \"Massachusetts\",\n", - " \"MI\": \"Michigan\",\n", - " \"MN\": \"Minnesota\",\n", - " \"MS\": \"Mississippi\",\n", - " \"MO\": \"Missouri\",\n", - " \"MT\": \"Montana\",\n", - " \"NE\": \"Nebraska\",\n", - " \"NV\": \"Nevada\",\n", - " \"NH\": \"New Hampshire\",\n", - " \"NJ\": \"New Jersey\",\n", - " \"NM\": \"New Mexico\",\n", - " \"NY\": \"New York\",\n", - " \"NC\": \"North Carolina\",\n", - " \"ND\": \"North Dakota\",\n", - " \"MP\": \"Northern Mariana Islands\",\n", - " \"OH\": \"Ohio\",\n", - " \"OK\": \"Oklahoma\",\n", - " \"OR\": \"Oregon\",\n", - " \"PW\": \"Palau\",\n", - " \"PA\": \"Pennsylvania\",\n", - " \"PR\": \"Puerto Rico\",\n", - " \"RI\": \"Rhode Island\",\n", - " \"SC\": \"South Carolina\",\n", - " \"SD\": \"South Dakota\",\n", - " \"TN\": \"Tennessee\",\n", - " \"TX\": \"Texas\",\n", - " \"UT\": \"Utah\",\n", - " \"VT\": \"Vermont\",\n", - " \"VI\": \"Virgin Islands\",\n", - " \"VA\": \"Virginia\",\n", - " \"WA\": \"Washington\",\n", - " \"WV\": \"West Virginia\",\n", - " \"WI\": \"Wisconsin\",\n", - " \"WY\": \"Wyoming\"\n", - "}" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": { - "ExecuteTime": { - "end_time": "2020-04-03T15:07:28.135732Z", - "start_time": "2020-04-03T15:07:28.127440Z" - }, - "execution": { - "iopub.execute_input": "2020-04-28T12:14:43.933849Z", - "iopub.status.busy": "2020-04-28T12:14:43.922674Z", - "iopub.status.idle": "2020-04-28T12:14:43.941237Z", - "shell.execute_reply": "2020-04-28T12:14:43.940699Z" - }, - "papermill": { - "duration": 0.049349, - "end_time": "2020-04-28T12:14:43.941340", - "exception": false, - "start_time": "2020-04-28T12:14:43.891991", - "status": "completed" - }, - "tags": [] - }, - "outputs": [], - "source": [ - "#hide\n", - "df['day'] = df.date.apply(lambda x: x.date()).apply(str)\n", - "df = df.sort_values(by='day')\n", - "dfc = df[df.avg_daily_new > 0]" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": { - "ExecuteTime": { - "end_time": "2020-04-03T15:07:28.138712Z", - "start_time": "2020-04-03T15:07:28.136560Z" - }, - "execution": { - "iopub.execute_input": "2020-04-28T12:14:43.990559Z", - "iopub.status.busy": "2020-04-28T12:14:43.989323Z", - "iopub.status.idle": "2020-04-28T12:14:43.992981Z", - "shell.execute_reply": "2020-04-28T12:14:43.993464Z" - }, - "papermill": { - "duration": 0.030511, - "end_time": "2020-04-28T12:14:43.993587", - "exception": false, - "start_time": "2020-04-28T12:14:43.963076", - "status": "completed" - }, - "tags": [] - }, - "outputs": [], - "source": [ - "#hide\n", - "days = dfc.day.unique().tolist()\n", - "states = dfc.state.unique().tolist()\n", - "states.sort()" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": { - "ExecuteTime": { - "end_time": "2020-04-03T15:07:28.146823Z", - "start_time": "2020-04-03T15:07:28.139972Z" - }, - "execution": { - "iopub.execute_input": "2020-04-28T12:14:44.051031Z", - "iopub.status.busy": "2020-04-28T12:14:44.049268Z", - "iopub.status.idle": "2020-04-28T12:14:44.053442Z", - "shell.execute_reply": "2020-04-28T12:14:44.053926Z" - }, - "papermill": { - "duration": 0.03894, - "end_time": "2020-04-28T12:14:44.054047", - "exception": false, - "start_time": "2020-04-28T12:14:44.015107", - "status": "completed" - }, - "tags": [] - }, - "outputs": [], - "source": [ - "#hide\n", - "# make figure\n", - "fig_dict = {\n", - " \"data\": [],\n", - " \"layout\": {},\n", - " \"frames\": []\n", - "}\n", - "\n", - "# fill in most of layout\n", - "fig_dict[\"layout\"][\"height\"] = 700\n", - "fig_dict[\"layout\"][\"width\"] = 900\n", - "fig_dict[\"layout\"][\"xaxis\"] = {\"range\": [np.log10(5), np.log10(dfc['confirmed'].max() + 5000)], \"title\": \"Total Confirmed Cases (log scale)\", \"type\": \"log\"}\n", - "fig_dict[\"layout\"][\"yaxis\"] = {\"range\": [np.log10(1), np.log10(dfc['avg_daily_new'].max() + 500)], \"title\": \"Average Daily New Cases (log scale)\", \"type\": \"log\"}\n", - "fig_dict[\"layout\"][\"hovermode\"] = \"closest\"\n", - "fig_dict[\"layout\"][\"sliders\"] = {\n", - " \"args\": [\n", - " \"transition\", {\n", - " \"duration\": 100,\n", - " \"easing\": \"cubic-in-out\"\n", - " }\n", - " ],\n", - " \"initialValue\": min(days),\n", - " \"plotlycommand\": \"animate\",\n", - " \"values\": days,\n", - " \"visible\": True\n", - "}\n", - "\n", - "# buttons\n", - "fig_dict[\"layout\"][\"updatemenus\"] = [\n", - " {\n", - " \"buttons\": [\n", - " {\n", - " \"args\": [None, {\"frame\": {\"duration\": 300, \"redraw\": True},\n", - " \"fromcurrent\": True, \"transition\": {\"duration\": 300,\n", - " \"easing\": \"linear\"}}],\n", - " \"label\": \"Play\",\n", - " \"method\": \"animate\"\n", - " },\n", - " {\n", - " \"args\": [[None], {\"frame\": {\"duration\": 0, \"redraw\": False},\n", - " \"mode\": \"immediate\",\n", - " \"transition\": {\"duration\": 0}}],\n", - " \"label\": \"Pause\",\n", - " \"method\": \"animate\"\n", - " }\n", - " ],\n", - " \"direction\": \"left\",\n", - " \"pad\": {\"r\": 10, \"t\": 87},\n", - " \"showactive\": False,\n", - " \"type\": \"buttons\",\n", - " \"x\": 0.05,\n", - " \"xanchor\": \"right\",\n", - " \"y\": 0.05,\n", - " \"yanchor\": \"top\"\n", - " }\n", - "]\n", - "\n", - "# sliders\n", - "sliders_dict = {\n", - " \"active\": len(days)-1,\n", - " \"yanchor\": \"top\",\n", - " \"xanchor\": \"left\",\n", - " \"currentvalue\": {\n", - " \"font\": {\"size\": 20},\n", - "# \"prefix\": \"Date: \",\n", - " \"visible\": True,\n", - " \"xanchor\": \"right\"\n", - " },\n", - " \"transition\": {\"duration\": 100},\n", - " \"pad\": {\"b\": 10, \"t\": 50},\n", - " \"len\": 0.9,\n", - " \"x\": 0.1,\n", - " \"y\": 0,\n", - " \"steps\": []\n", - "}" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": { - "ExecuteTime": { - "end_time": "2020-04-03T15:07:29.766183Z", - "start_time": "2020-04-03T15:07:28.147867Z" - }, - "execution": { - "iopub.execute_input": "2020-04-28T12:14:44.115239Z", - "iopub.status.busy": "2020-04-28T12:14:44.111217Z", - "iopub.status.idle": "2020-04-28T12:14:54.659765Z", - "shell.execute_reply": "2020-04-28T12:14:54.658505Z" - }, - "papermill": { - "duration": 10.584356, - "end_time": "2020-04-28T12:14:54.659903", - "exception": false, - "start_time": "2020-04-28T12:14:44.075547", - "status": "completed" - }, - "tags": [] - }, - "outputs": [], - "source": [ - "#hide\n", - "# make data\n", - "day = max(days)\n", - "for state in states:\n", - " dataset_by_day = dfc[dfc[\"day\"] <= day]\n", - " dataset_by_day_and_state = dataset_by_day[ dataset_by_day[\"state\"]==state ]\n", - " \n", - " data_dict = {\n", - " \"x\": list(dataset_by_day_and_state[\"confirmed\"]),\n", - " \"y\": list(dataset_by_day_and_state[\"avg_daily_new\"]),\n", - " \"mode\": \"lines\",\n", - " \"text\": dataset_by_day_and_state[['confirmed', 'avg_daily_new']],\n", - " \"name\": state,\n", - " 'hoverlabel': {'namelength': 0},\n", - " 'hovertemplate': '%{hovertext}
Confirmed: %{x:,d}
Average Daily: %{y:,.2f}',\n", - " 'hovertext': dataset_by_day_and_state['state'].apply(lambda s: state_names.get(s, '??') + f' ({s})')\n", - " }\n", - " fig_dict[\"data\"].append(data_dict)\n", - "\n", - "# make frames\n", - "for day in days:\n", - " frame = {\"data\": [], \"name\": day}\n", - " for state in states:\n", - " dataset_by_day = dfc[dfc[\"day\"] <= day]\n", - " dataset_by_day_and_state = dataset_by_day[\n", - " dataset_by_day[\"state\"] == state]\n", - "\n", - " data_dict = {\n", - " \"x\": list(dataset_by_day_and_state[\"confirmed\"]),\n", - " \"y\": list(dataset_by_day_and_state[\"avg_daily_new\"]),\n", - " \"mode\": \"lines\",\n", - " \"text\": dataset_by_day_and_state[['confirmed', 'avg_daily_new']],\n", - " \"name\": state\n", - " }\n", - " frame[\"data\"].append(data_dict)\n", - "\n", - " fig_dict[\"frames\"].append(frame)\n", - " slider_step = {\"args\": [\n", - " [day],\n", - " {\"frame\": {\"duration\": 100, \"redraw\": True},\n", - " \"mode\": \"immediate\",\n", - " \"transition\": {\"duration\": 100, 'easing': 'linear'}}\n", - " ],\n", - " \"label\": day,\n", - " \"method\": \"animate\"}\n", - " sliders_dict[\"steps\"].append(slider_step)" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": { - "ExecuteTime": { - "end_time": "2020-04-03T15:07:33.332997Z", - "start_time": "2020-04-03T15:07:29.767288Z" - }, - "execution": { - "iopub.execute_input": "2020-04-28T12:14:54.710718Z", - "iopub.status.busy": "2020-04-28T12:14:54.710124Z", - "iopub.status.idle": "2020-04-28T12:15:15.015688Z", - "shell.execute_reply": "2020-04-28T12:15:15.016502Z" - }, - "papermill": { - "duration": 20.334241, - "end_time": "2020-04-28T12:15:15.016723", - "exception": false, - "start_time": "2020-04-28T12:14:54.682482", - "status": "completed" - }, - "scrolled": false, - "tags": [] - }, - "outputs": [], - "source": [ - "#hide\n", - "fig_dict[\"layout\"][\"sliders\"] = [sliders_dict]\n", - "#fig = go.Figure(fig_dict)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "ExecuteTime": { - "end_time": "2020-03-31T03:24:03.114334Z", - "start_time": "2020-03-31T03:24:03.109579Z" - }, - "papermill": { - "duration": 0.02207, - "end_time": "2020-04-28T12:15:15.063057", - "exception": false, - "start_time": "2020-04-28T12:15:15.040987", - "status": "completed" - }, - "tags": [] - }, - "source": [ - "States/countries will drift off the diagonal when they are flattening the curve.\n", - "\n", - "\n", - "> Tip: To highlight states/countries click (Shift+ for multiple) on the name in the legend. Click outside the legend to highlight all states/countries.\n", - "\n", - "\n", - "Only entries with at least 100 confirmed cases are considered.\n", - "\n", - "\n", - "The top 5 entries are initially highlighted." - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": { - "execution": { - "iopub.execute_input": "2020-04-28T12:15:15.116786Z", - "iopub.status.busy": "2020-04-28T12:15:15.116160Z", - "iopub.status.idle": "2020-04-28T12:15:15.118318Z", - "shell.execute_reply": "2020-04-28T12:15:15.117732Z" - }, - "papermill": { - "duration": 0.033589, - "end_time": "2020-04-28T12:15:15.118425", - "exception": false, - "start_time": "2020-04-28T12:15:15.084836", - "status": "completed" - }, - "tags": [] - }, - "outputs": [], - "source": [ - "#hide\n", - "africa_names = [\n", - " 'Algeria',\n", - " 'Angola',\n", - " 'Benin',\n", - " 'Botswana',\n", - " 'Burkina Faso',\n", - " 'Burundi',\n", - " 'Cabo Verde',\n", - " 'Cameroon',\n", - " 'Central African Republic',\n", - " 'Chad',\n", - " 'Congo (Brazzaville)',\n", - " 'Congo (Kinshasa)',\n", - " 'Djibouti',\n", - " 'Egypt',\n", - " 'Equatorial Guinea',\n", - " 'Eritrea',\n", - " 'Eswatini',\n", - " 'Ethiopia',\n", - " 'Gabon',\n", - " 'Gambia',\n", - " 'Ghana',\n", - " 'Guinea',\n", - " 'Guinea-Bissau',\n", - " 'Ivory Coast',\n", - " 'Kenya',\n", - " 'Liberia',\n", - " 'Libya',\n", - " 'Madagascar',\n", - " 'Malawi',\n", - " 'Mali',\n", - " 'Mauritania',\n", - " 'Mauritius',\n", - " 'Morocco',\n", - " 'Mozambique',\n", - " 'Namibia',\n", - " 'Niger',\n", - " 'Nigeria',\n", - " 'Rwanda',\n", - " 'Sao Tome and Principe',\n", - " 'Senegal',\n", - " 'Seychelles',\n", - " 'Sierra Leone',\n", - " 'Somalia',\n", - " 'South Africa',\n", - " 'South Sudan',\n", - " 'Sudan',\n", - " 'Tanzania',\n", - " 'Togo',\n", - " 'Tunisia',\n", - " 'Uganda',\n", - " 'Western Sahara',\n", - " 'Zambia',\n", - " 'Zimbabwe'\n", - "]" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": { - "execution": { - "iopub.execute_input": "2020-04-28T12:15:15.168088Z", - "iopub.status.busy": "2020-04-28T12:15:15.167487Z", - "iopub.status.idle": "2020-04-28T12:15:15.170661Z", - "shell.execute_reply": "2020-04-28T12:15:15.170145Z" - }, - "papermill": { - "duration": 0.030846, - "end_time": "2020-04-28T12:15:15.170766", - "exception": false, - "start_time": "2020-04-28T12:15:15.139920", - "status": "completed" - }, - "tags": [] - }, - "outputs": [], - "source": [ - "#hide\n", - "america_names = [\n", - " 'Antigua and Barbuda',\n", - " 'Argentina',\n", - " 'Bahamas',\n", - " 'Barbados',\n", - " 'Belize',\n", - " 'Bolivia',\n", - " 'Brazil',\n", - " 'Canada',\n", - " 'Chile',\n", - " 'Colombia',\n", - " 'Costa Rica',\n", - " 'Cuba',\n", - " 'Dominica',\n", - " 'Dominican Republic',\n", - " 'Ecuador',\n", - " 'El Salvador',\n", - " 'Grenada',\n", - " 'Guatemala',\n", - " 'Guyana',\n", - " 'Haiti',\n", - " 'Honduras',\n", - " 'Jamaica',\n", - " 'Mexico',\n", - " 'Nicaragua',\n", - " 'Panama',\n", - " 'Paraguay',\n", - " 'Peru',\n", - " 'Saint Kitts and Nevis',\n", - " 'Saint Lucia',\n", - " 'Saint Vincent and the Grenadines',\n", - " 'Suriname',\n", - " 'Trinidad and Tobago',\n", - " 'United States of America',\n", - " 'Uruguay',\n", - " 'Venezuela'\n", - "]" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "metadata": { - "execution": { - "iopub.execute_input": "2020-04-28T12:15:15.220424Z", - "iopub.status.busy": "2020-04-28T12:15:15.219792Z", - "iopub.status.idle": "2020-04-28T12:15:15.223798Z", - "shell.execute_reply": "2020-04-28T12:15:15.223284Z" - }, - "papermill": { - "duration": 0.031594, - "end_time": "2020-04-28T12:15:15.223900", - "exception": false, - "start_time": "2020-04-28T12:15:15.192306", - "status": "completed" - }, - "tags": [] - }, - "outputs": [], - "source": [ - "#hide\n", - "asiapacific_names = [\n", - " 'Afghanistan',\n", - " 'Armenia',\n", - " 'Australia',\n", - " 'Azerbaijan',\n", - " 'Bahrain',\n", - " 'Bangladesh',\n", - " 'Bhutan',\n", - " 'Brunei',\n", - " 'Cambodia',\n", - " 'China',\n", - " 'Cyprus',\n", - " 'East Timor',\n", - " 'Fiji',\n", - " 'Georgia',\n", - " 'Hong Kong',\n", - " 'India',\n", - " 'Indonesia',\n", - " 'Iran',\n", - " 'Iraq',\n", - " 'Israel',\n", - " 'Japan',\n", - " 'Jordan',\n", - " 'Kazakhstan',\n", - " 'Kuwait',\n", - " 'Kyrgyzstan',\n", - " 'Laos',\n", - " 'Lebanon',\n", - " 'Malaysia',\n", - " 'Maldives',\n", - " 'Mongolia',\n", - " 'Myanmar',\n", - " 'Nepal',\n", - " 'New Zealand',\n", - " 'Oman',\n", - " 'Pakistan',\n", - " 'Papua New Guinea',\n", - " 'Philippines',\n", - " 'Qatar',\n", - " 'Russia',\n", - " 'Saudi Arabia',\n", - " 'Singapore',\n", - " 'South Korea',\n", - " 'Sri Lanka',\n", - " 'Syria',\n", - " 'Taiwan',\n", - " 'Thailand',\n", - " 'Turkey',\n", - " 'United Arab Emirates',\n", - " 'Uzbekistan',\n", - " 'Vietnam',\n", - " 'West Bank and Gaza'\n", - "]" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "metadata": { - "execution": { - "iopub.execute_input": "2020-04-28T12:15:15.276394Z", - "iopub.status.busy": "2020-04-28T12:15:15.275799Z", - "iopub.status.idle": "2020-04-28T12:15:15.277444Z", - "shell.execute_reply": "2020-04-28T12:15:15.278000Z" - }, - "papermill": { - "duration": 0.032555, - "end_time": "2020-04-28T12:15:15.278119", - "exception": false, - "start_time": "2020-04-28T12:15:15.245564", - "status": "completed" - }, - "tags": [] - }, - "outputs": [], - "source": [ - "#hide\n", - "europe_names = [\n", - " 'Albania',\n", - " 'Andorra',\n", - " 'Armenia',\n", - " 'Austria',\n", - " 'Azerbaijan',\n", - " 'Belarus',\n", - " 'Belgium',\n", - " 'Bosnia Herzegovina',\n", - " 'Bulgaria',\n", - " 'Croatia',\n", - " 'Cyprus',\n", - " 'Czechia',\n", - " 'Denmark',\n", - " 'Estonia',\n", - " 'Finland',\n", - " 'France',\n", - " 'Georgia',\n", - " 'Germany',\n", - " 'Greece',\n", - " 'Hungary',\n", - " 'Iceland',\n", - " 'Ireland',\n", - " 'Italy',\n", - " 'Kazakhstan',\n", - " 'Kosovo',\n", - " 'Latvia',\n", - " 'Liechtenstein',\n", - " 'Lithuania',\n", - " 'Luxembourg',\n", - " 'Malta',\n", - " 'Moldova',\n", - " 'Monaco',\n", - " 'Montenegro',\n", - " 'Netherlands',\n", - " 'North Macedonia',\n", - " 'Norway',\n", - " 'Poland',\n", - " 'Portugal',\n", - " 'Romania',\n", - " 'Russia',\n", - " 'San Marino',\n", - " 'Serbia',\n", - " 'Slovakia',\n", - " 'Slovenia',\n", - " 'Spain',\n", - " 'Sweden',\n", - " 'Switzerland',\n", - " 'Turkey',\n", - " 'Ukraine',\n", - " 'United Kingdom',\n", - " 'Vatican City'\n", - "]" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "metadata": { - "execution": { - "iopub.execute_input": "2020-04-28T12:15:15.338037Z", - "iopub.status.busy": "2020-04-28T12:15:15.334090Z", - "iopub.status.idle": "2020-04-28T12:15:16.066151Z", - "shell.execute_reply": "2020-04-28T12:15:16.065412Z" - }, - "papermill": { - "duration": 0.76659, - "end_time": "2020-04-28T12:15:16.066268", - "exception": false, - "start_time": "2020-04-28T12:15:15.299678", - "status": "completed" - }, - "tags": [] - }, - "outputs": [], - "source": [ - "#hide\n", - "from load_covid_data import load_individual_timeseries\n", - "\n", - "# load data\n", - "df = load_individual_timeseries('confirmed')\n", - "df = df[~df['country'].str.contains(' \\(total\\)')].drop(['state', 'type'], axis=1).reset_index()\n", - "\n", - "# clean data\n", - "df['country'] = df['country'].replace({'Bosnia and Herzegovina':'Bosnia Herzegovina' })\n", - "df['country'] = df['country'].replace({'Timor-Leste' :'East Timor' })\n", - "df['country'] = df['country'].replace({\"Cote d'Ivoire\" :'Ivory Coast' })\n", - "df['country'] = df['country'].replace({'Burma' :'Myanmar' })\n", - "df['country'] = df['country'].replace({'Korea, South' :'South Korea' })\n", - "df['country'] = df['country'].replace({'Taiwan*' :'Taiwan' })\n", - "df['country'] = df['country'].replace({'US' :'United States of America'})\n", - "df['country'] = df['country'].replace({'Holy See' :'Vatican City' })\n", - "\n", - "# append usa-by-states data\n", - "dfc['country'] = dfc['state'].map(state_names)\n", - "df = (df.rename(columns={'cases': 'confirmed'})\n", - " .append(dfc.drop(['index', 'state', 'daily_new', 'avg_daily_new', 'day'], axis=1)\n", - " .astype({'confirmed': 'int64'})))\n", - "\n", - "# aggregate data\n", - "df = (df.sort_values(by=['country', 'date'])\n", - " .groupby(['country', 'date'])['confirmed']\n", - " .agg(sum)).reset_index()\n", - "\n", - "# additional measurements\n", - "df = df.assign(daily_new_abs=(df.groupby('country', as_index=False)[['confirmed']]\n", - " .diff()\n", - " .fillna(0)\n", - " .astype('int64')))\n", - "df = df.assign(daily_new_avg=(df.groupby('country', as_index=False)[['daily_new_abs']]\n", - " .rolling(7)\n", - " .mean()\n", - " .fillna(0)\n", - " .round(decimals=2)\n", - " .reset_index(drop=True)))\n", - "\n", - "# slice data\n", - "df_usa = df[(df.confirmed > case_threshold) & (df.daily_new_avg > 0) & (df['country'].isin(state_names.values()))]\n", - "df_africa = df[(df.confirmed > case_threshold) & (df.daily_new_avg > 0) & (df['country'].isin(africa_names ))]\n", - "df_america = df[(df.confirmed > case_threshold) & (df.daily_new_avg > 0) & (df['country'].isin(america_names ))]\n", - "df_asiapacific = df[(df.confirmed > case_threshold) & (df.daily_new_avg > 0) & (df['country'].isin(asiapacific_names ))]\n", - "df_europe = df[(df.confirmed > case_threshold) & (df.daily_new_avg > 0) & (df['country'].isin(europe_names ))]\n", - "\n", - "#df.to_csv(r'.\\data.csv')\n", - "#df" - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "metadata": { - "execution": { - "iopub.execute_input": "2020-04-28T12:15:16.120206Z", - "iopub.status.busy": "2020-04-28T12:15:16.119587Z", - "iopub.status.idle": "2020-04-28T12:15:16.204727Z", - "shell.execute_reply": "2020-04-28T12:15:16.203844Z" - }, - "papermill": { - "duration": 0.116611, - "end_time": "2020-04-28T12:15:16.204917", - "exception": false, - "start_time": "2020-04-28T12:15:16.088306", - "status": "completed" - }, - "pycharm": { - "name": "#%%\n" - }, - "tags": [] - }, - "outputs": [], - "source": [ - "#hide\n", - "import altair as alt\n", - "alt.data_transformers.disable_max_rows()\n", - "\n", - "\n", - "def make_chart(data=df):\n", - "\n", - " countries = data.country.unique().tolist()\n", - "\n", - " highlighted = data.sort_values('confirmed', ascending=False).groupby('country').head(1).country.tolist()[:5]\n", - "\n", - " selection = alt.selection_multi(bind='legend',\n", - " fields=['country'],\n", - " init=[{'country': x} for x in highlighted])\n", - "\n", - " base = (alt.Chart(data=data)\n", - " .properties(width=550)\n", - " .encode(x=alt.X(scale=alt.Scale(type='log'),\n", - " shorthand='confirmed:Q',\n", - " title='Total Confirmed Cases (log scale)'),\n", - " y=alt.Y(scale=alt.Scale(type='log'),\n", - " shorthand='daily_new_avg:Q',\n", - " title='Average Daily New Cases (log scale)'),\n", - " color=alt.Color(legend=alt.Legend(columns=3,\n", - " symbolLimit=len(countries),\n", - " title='Country/State:'),\n", - " scale=alt.Scale(scheme='category20b'),\n", - " shorthand='country:N'),\n", - " tooltip=list(data),\n", - " opacity=alt.condition(selection, alt.value(1), alt.value(0.05))))\n", - "\n", - " chart = (base.mark_line()\n", - " .add_selection(selection)\n", - " .configure_legend(labelFontSize=10,\n", - " titleFontSize=12)\n", - " .configure_axis(labelFontSize=10,\n", - " titleFontSize=12))\n", - "\n", - " return chart" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "papermill": { - "duration": 0.02266, - "end_time": "2020-04-28T12:15:16.249756", - "exception": false, - "start_time": "2020-04-28T12:15:16.227096", - "status": "completed" - }, - "tags": [] - }, - "source": [ - "## United States of America" - ] - }, - { - "cell_type": "code", - "execution_count": 17, - "metadata": { - "execution": { - "iopub.execute_input": "2020-04-28T12:15:16.298205Z", - "iopub.status.busy": "2020-04-28T12:15:16.297605Z", - "iopub.status.idle": "2020-04-28T12:15:16.425598Z", - "shell.execute_reply": "2020-04-28T12:15:16.426084Z" - }, - "papermill": { - "duration": 0.1547, - "end_time": "2020-04-28T12:15:16.426217", - "exception": false, - "start_time": "2020-04-28T12:15:16.271517", - "status": "completed" - }, - "pycharm": { - "name": "#%%\n" - }, - "tags": [] - }, - "outputs": [ - { - "data": { - "text/html": [ - "\n", - "
\n", - "" - ], - "text/plain": [ - "alt.Chart(...)" - ] - }, - "execution_count": 17, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "#hide_input\n", - "make_chart(df_usa)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "papermill": { - "duration": 0.025725, - "end_time": "2020-04-28T12:15:16.477703", - "exception": false, - "start_time": "2020-04-28T12:15:16.451978", - "status": "completed" - }, - "tags": [] - }, - "source": [ - "## Africa" - ] - }, - { - "cell_type": "code", - "execution_count": 18, - "metadata": { - "execution": { - "iopub.execute_input": "2020-04-28T12:15:16.535947Z", - "iopub.status.busy": "2020-04-28T12:15:16.533243Z", - "iopub.status.idle": "2020-04-28T12:15:16.606634Z", - "shell.execute_reply": "2020-04-28T12:15:16.605695Z" - }, - "papermill": { - "duration": 0.103488, - "end_time": "2020-04-28T12:15:16.606760", - "exception": false, - "start_time": "2020-04-28T12:15:16.503272", - "status": "completed" - }, - "pycharm": { - "name": "#%%\n" - }, - "tags": [] - }, - "outputs": [ - { - "data": { - "text/html": [ - "\n", - "
\n", - "" - ], - "text/plain": [ - "alt.Chart(...)" - ] - }, - "execution_count": 18, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "#hide_input\n", - "make_chart(df_africa)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "papermill": { - "duration": 0.027604, - "end_time": "2020-04-28T12:15:16.662022", - "exception": false, - "start_time": "2020-04-28T12:15:16.634418", - "status": "completed" - }, - "tags": [] - }, - "source": [ - "## America" - ] - }, - { - "cell_type": "code", - "execution_count": 19, - "metadata": { - "execution": { - "iopub.execute_input": "2020-04-28T12:15:16.743561Z", - "iopub.status.busy": "2020-04-28T12:15:16.722174Z", - "iopub.status.idle": "2020-04-28T12:15:16.795063Z", - "shell.execute_reply": "2020-04-28T12:15:16.794405Z" - }, - "papermill": { - "duration": 0.106016, - "end_time": "2020-04-28T12:15:16.795183", - "exception": false, - "start_time": "2020-04-28T12:15:16.689167", - "status": "completed" - }, - "pycharm": { - "name": "#%%\n" - }, - "tags": [] - }, - "outputs": [ - { - "data": { - "text/html": [ - "\n", - "
\n", - "" - ], - "text/plain": [ - "alt.Chart(...)" - ] - }, - "execution_count": 19, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "#hide_input\n", - "make_chart(df_america)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "papermill": { - "duration": 0.030546, - "end_time": "2020-04-28T12:15:16.856347", - "exception": false, - "start_time": "2020-04-28T12:15:16.825801", - "status": "completed" - }, - "tags": [] - }, - "source": [ - "## Asia-Pacific" - ] - }, - { - "cell_type": "code", - "execution_count": 20, - "metadata": { - "execution": { - "iopub.execute_input": "2020-04-28T12:15:16.920815Z", - "iopub.status.busy": "2020-04-28T12:15:16.920211Z", - "iopub.status.idle": "2020-04-28T12:15:17.017627Z", - "shell.execute_reply": "2020-04-28T12:15:17.018128Z" - }, - "papermill": { - "duration": 0.13301, - "end_time": "2020-04-28T12:15:17.018263", - "exception": false, - "start_time": "2020-04-28T12:15:16.885253", - "status": "completed" - }, - "pycharm": { - "name": "#%%\n" - }, - "tags": [] - }, - "outputs": [ - { - "data": { - "text/html": [ - "\n", - "
\n", - "" - ], - "text/plain": [ - "alt.Chart(...)" - ] - }, - "execution_count": 20, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "#hide_input\n", - "make_chart(df_asiapacific)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "papermill": { - "duration": 0.033726, - "end_time": "2020-04-28T12:15:17.086399", - "exception": false, - "start_time": "2020-04-28T12:15:17.052673", - "status": "completed" - }, - "tags": [] - }, - "source": [ - "## Europe" - ] - }, - { - "cell_type": "code", - "execution_count": 21, - "metadata": { - "execution": { - "iopub.execute_input": "2020-04-28T12:15:17.159526Z", - "iopub.status.busy": "2020-04-28T12:15:17.158870Z", - "iopub.status.idle": "2020-04-28T12:15:17.263161Z", - "shell.execute_reply": "2020-04-28T12:15:17.263654Z" - }, - "papermill": { - "duration": 0.142393, - "end_time": "2020-04-28T12:15:17.263788", - "exception": false, - "start_time": "2020-04-28T12:15:17.121395", - "status": "completed" - }, - "pycharm": { - "name": "#%%\n" - }, - "tags": [] - }, - "outputs": [ - { - "data": { - "text/html": [ - "\n", - "
\n", - "" - ], - "text/plain": [ - "alt.Chart(...)" - ] - }, - "execution_count": 21, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "#hide_input\n", - "make_chart(df_europe)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "papermill": { - "duration": 0.036694, - "end_time": "2020-04-28T12:15:17.338537", - "exception": false, - "start_time": "2020-04-28T12:15:17.301843", - "status": "completed" - }, - "tags": [] - }, - "source": [ - "## Explanation\n", - "\n", - "The exponential growth stage of a pandemic must end sometime, either as the virus runs out of people to infect, or as societies get it under control. However, it can be difficult to tell exactly when exponential growth is ending, for several reasons:\n", - "\n", - "* Humans aren't wired to understand exponentials at a glance.\n", - "* It can be difficult to compare regions with differing first-infection dates, testing rates, and populations.\n", - "* The news tends to report individual data points, without the contextual information necessary to interpret it.\n", - "* If the plot doesn't explicitly plot the rate of new cases, a change must be quite dramatic before it becomes distinguishable.\n", - "\n", - "This visualization plots the (sliding average of) daily new cases against the total cases, for each US state (with other countries and regions to come). This has the advantage of aligning all of them onto a baseline trajectory of exponential growth, with a very clear downward plummet when a given state gets the virus under control. As explained in the caveats below, this visualization has a very specific purpose: to make it clear whether a given state has managed to exit the exponential trajectory or not.\n", - "\n", - "_minutephysics_ has an excellent video on this visualization type, [How To Tell If We're Beating COVID-19](https://youtu.be/54XLXg4fYsc)." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "papermill": { - "duration": 0.036775, - "end_time": "2020-04-28T12:15:17.415477", - "exception": false, - "start_time": "2020-04-28T12:15:17.378702", - "status": "completed" - }, - "tags": [] - }, - "source": [ - "## Caveats\n", - "\n", - "1. The logarithmic scales can make it seem as if states are closer together than they actually are. For example, at time of writing (April 5th) New York (the leader in US cases) and New Jersey (the runner-up) look as though it's a close race, but New York has over three times as any cases as New Jersey.\n", - "2. The logarithmic scale can also obscure a resurgence of infections after a significant downturn, since the trace won't move much to the right during a short period late in time.\n", - "3. Time is represented by the animation, not by the x-axis, which is unusual for most charts made about COVID-19. This is the plot's main advantage, because it aligns states onto _roughly_ the same trajectory regardless of population or testing rate, but it may be unexpected.\n", - "4. The true number of cases is unknown, so the actual slope of the log-log change plot is unknown. All states are also increasing their testing rate over time, so these data may imply that the infection rate is increasing faster than it actually is.\n", - "5. The data these plots rely on are incomplete, and come in less smoothly than they may imply. Healthcare systems around the world collect and report data when they can.\n", - "6. This chart plots the logarithm of the sliding window average of the daily growth rate on the y-axis, not the raw daily growth rate, because there's too much variability day-to-day to visually detect the trend. This also makes the plot a pessimistic estimate of where each state is on its trajectory." - ] - }, - { - "cell_type": "code", - "execution_count": 22, - "metadata": { - "execution": { - "iopub.execute_input": "2020-04-28T12:15:17.579421Z", - "iopub.status.busy": "2020-04-28T12:15:17.577590Z", - "iopub.status.idle": "2020-04-28T12:15:20.956083Z", - "shell.execute_reply": "2020-04-28T12:15:20.956782Z" - }, - "papermill": { - "duration": 3.430202, - "end_time": "2020-04-28T12:15:20.956933", - "exception": false, - "start_time": "2020-04-28T12:15:17.526731", - "status": "completed" - }, - "tags": [] - }, - "outputs": [], - "source": [ - "# This section was taken out because it was bloating the notebook size to be too large\n", - "#hide_input\n", - "# fig.show()\n", - "\n", - "# The animated visualization for the US and the descriptions were made by [Daniel Cox](https://twitter.com/danielpcox), with thanks to Henry of _minutephysics_ for [How To Tell If We're Beating COVID-19](https://youtu.be/54XLXg4fYsc), and [covidtracking.com](covidtracking.com) for US data." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "papermill": { - "duration": 1.756859, - "end_time": "2020-04-28T12:15:29.528195", - "exception": false, - "start_time": "2020-04-28T12:15:27.771336", - "status": "completed" - }, - "tags": [] - }, - "source": [ - "\n", - "\n", - "The static visualizations for the US, Africa, America, Asia-Pacific and Europe were made by [Martin Boehler](https://www.linkedin.com/in/martin-boehler/), with thanks to Daniel Cox for this great inspiration and implementation, and [*Johns Hopkins University CSSE*](https://systems.jhu.edu/) for the [*2019 Novel Coronavirus COVID-19 (2019-nCoV) Data Repository*](https://github.com/CSSEGISandData/COVID-19)." - ] - } - ], - "metadata": { - "hide_input": false, - "kernelspec": { - "display_name": "visualization-curriculum-gF8wUgMm", - "language": "python", - "name": "visualization-curriculum-gf8wugmm" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.6.7" - }, - "papermill": { - "duration": 50.80964, - "end_time": "2020-04-28T12:15:31.398022", - "environment_variables": {}, - "exception": null, - "input_path": "2020-03-30-us-inflection.ipynb", - "output_path": "2020-03-30-us-inflection.ipynb", - "parameters": {}, - "start_time": "2020-04-28T12:14:40.588382", - "version": "2.0.0" - } - }, - "nbformat": 4, - "nbformat_minor": 4 -}