forked from nytimes/covid-19-data
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathmyanalysis_bokeh.py
119 lines (102 loc) · 4.65 KB
/
myanalysis_bokeh.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
# %%
import pandas as pd
import numpy as np
from datetime import datetime
from bokeh.plotting import figure
from bokeh.models import HoverTool, LinearColorMapper, CategoricalColorMapper
from bokeh.io import output_notebook, show
from bokeh.palettes import Spectral11
from bokeh.transform import factor_cmap
default_width=2
output_notebook()
# %% [markdown]
# **********************************************************************************************************
# # Setup
# 1. Read in the covid-19-data from nytimes for state and county (https://github.com/nytimes/covid-19-data)
# 2. Pull in a city density table
# 3. Pull in a state density table
# 4. Identify list of states, counties and cities to graph
# **********************************************************************************************************
# %%
state_cov_data = pd.read_csv('us-states.csv')
county_cov_data = pd.read_csv('us-counties.csv')
population_city_density = pd.read_csv('city_density.csv')
population_city_density = population_city_density.rename(columns={'City': 'citystate', 'Population Density (Persons/Square Mile)': 'density', '2016 Population': 'population', 'Land Area (Square Miles)': 'area'} )
population_city_density[['city', 'state']] = population_city_density.citystate.str.split(', ', expand=True)
population_state_density = pd.read_csv('state_density.csv')
population_state_density = population_state_density.rename(columns={'State': 'state', 'Density': 'density', 'Pop': 'population', 'LandArea': 'area'})
county_cities_east = [
['New York', 'New York City', ['New York']],
['New Jersey', 'Bergen', ['Newark', 'Jersey City']],
['Massachusetts', 'Suffolk', ['Boston']],
['South Carolina', 'Charleston', ['Charleston']],
['Florida', 'Miami-Dade', ['Miami']],
['Florida', 'Broward', ['Fort Lauderdale']],
['Florida', 'Duval', ['Jacksonville']]
]
county_cities_west = [
['Washington', 'King', ['Seattle']],
['Washington', 'Snohomish', ['Everett']],
['California', 'Los Angeles', ['Los Angeles']],
['California', 'San Francisco', ['San Francisco']],
['California', 'San Diego', ['San Diego']],
['Texas', 'Harris', ['Houston']],
['Texas', 'Bexar', ['San Antonio']],
['Texas', 'Dallas', ['Dallas']],
['Texas', 'Travis', ['Austin']],
['Arizona', 'Maricopa', ['Phoenix']]
]
county_cities_midwest = [
['Illinois', 'Cook', ['Chicago']],
['Louisiana', 'Orleans', ['New Orleans']],
['Ohio', 'Cuyahoga', ['Cleveland']],
['Michigan', 'Wayne', ['Detroit']],
['Indiana', 'Hamilton', ['Carmel']],
['Pennsylvania', 'Philadelphia', ['Philadelphia']]
]
county_cities_east_map = pd.DataFrame(county_cities_east, columns = ['state', 'county', 'cities'])
county_cities_west_map = pd.DataFrame(county_cities_west, columns = ['state', 'county', 'cities'])
county_cities_midwest_map = pd.DataFrame(county_cities_midwest, columns = ['state', 'county', 'cities'])
states_east = county_cities_east_map.state.unique()
states_west = county_cities_west_map.state.unique()
states_midwest = county_cities_midwest_map.state.unique()
states = np.unique(np.concatenate((states_east, states_midwest, states_west)))
# %% [markdown]
# **********************************************************************************************************
# # New cases per day
# This trend line is a moving average of new cases over time.
# **********************************************************************************************************
# %%
def movingaverage(values, window):
weights = np.repeat(1.0, window)/window
sma = np.convolve(values, weights, 'valid')
return sma
def plotnewcases(row, state='US', color='none'):
if (state == 'US'):
total_cases_by_date = state_cov_data.groupby('date').sum()
minimum_cases = 100
else:
total_cases_by_date = state_cov_data[state_cov_data.state == state].groupby('date').sum()
minimum_cases = 15
total_cases_by_date = total_cases_by_date.reset_index()
total_cases_by_date = total_cases_by_date[total_cases_by_date.cases > minimum_cases]
delta_cases = total_cases_by_date.cases.to_numpy()[1:] - total_cases_by_date.head(len(total_cases_by_date)-1).cases.to_numpy()[0:]
delta_cases_ma = movingaverage(delta_cases, 7)
df = pd.DataFrame(delta_cases_ma, columns=['new'])
df['days'] = df.index
p.line('days', 'new', source=df,
line_width=default_width,
legend_label=state,
color=color)
return df
hover = HoverTool(
tooltips=[
('day', '$index'),
('new cases', '@new{0,0}')
]
)
p = figure(width=800, height=500, tools=[hover])
row = 1
for state, color in zip(states, Spectral11):
plotnewcases(row, state, color)
show(p)