-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmonkeypox.py
130 lines (110 loc) · 4.95 KB
/
monkeypox.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
from bs4 import BeautifulSoup
from selenium import webdriver
import re
import pandas as pd
import folium
import webbrowser
import os
import platform
def get_data():
"""
@return:
date:
date when data was last updated on webpage, with month as 3 letter abbreviation (Month Day Year)
state_case_dict:
Key is state and value is case amount. Dictionary contains monkeypox cases for each state in alpha order
ie. {'Alabama': 20, 'Alaska': 40, ...}
"""
# Save ChromeDriver to PATH
if platform.system() == "Windows":
driver_name = "chromedriver.exe"
elif platform.system() == 'Darwin':
driver_name = "chromedriver"
project_root = os.path.abspath(os.path.dirname(__file__))
driver_bin = os.path.join(project_root, driver_name)
# Creates webpage and retrieves html text
url = "https://www.cdc.gov/poxvirus/monkeypox/response/2022/us-map.html"
driver = webdriver.Chrome(executable_path=driver_bin)
driver.maximize_window()
driver.get(url)
# Searches for tags in html to find table containing states and cases
content = driver.page_source.encode('utf-8').strip()
soup = BeautifulSoup(content, 'lxml')
page = soup.find('div', class_='container d-flex flex-wrap body-wrapper bg-white')
data = page.find('div', class_='cdc-open-viz-module cdc-map-outer-container md')
table = data.find('div', class_='table-container')
# Searches for date when the data was last updated
title = page.find('div', class_='syndicate')
date = title.find('span', id='dateoutputspan').text
date = date.split(' ')
date = date[4] + ' ' + date[3] + ' ' + date[5]
# Saves each state and corresponding cases into a hash map state_cases_map
state_cases_dict = {}
for state_cases in table.find_all('tr'):
state_cases = state_cases.text.replace(',', '')
state = re.findall("[a-zA-Z\s]", state_cases)
cases = re.findall("[0-9]", state_cases)
state = "".join(state)
cases = "".join(cases)
if cases != "" and state != 'District Of Columbia' and state != 'Puerto Rico':
state_cases_dict[state] = int(cases)
return date, state_cases_dict
def create_map(updated_date, state_cases_dict):
"""
@param
updated_date:
Date when data was last updated on the website. Month Day Year, with month as 3-letter abbreviation
state_cases_dict:
Key is state and value is case amount. Dictionary contains monkeypox cases for each state in alpha order
ie. {'Alabama': 20, 'Alaska': 40, ...}
"""
# Use GeoJSON geometry found from URL to create state shapes. States are in alphabetical order
state_shapes = "https://raw.githubusercontent.com/python-visualization/folium/master/examples/data/us-states.json"
# Separate states and cases into lists, then creates Pandas DataFrame in order for Folium to map data
states_list = list(state_cases_dict.keys())
cases_list = list(state_cases_dict.values())
state_cases_df = pd.DataFrame({'State': states_list, 'Cases': cases_list})
# Note starting position of map and creates map elements (title/header, legend)
map_center = [37.5, -95]
map_usa = folium.Map(location=map_center, zoom_control=True, zoom_start=5)
legend_name = "Data from CDC, last updated " + updated_date
total_cases = f'{sum(cases_list):,}' # Adds commas to total cases number for better clarity
loc = f"Map of {total_cases} Confirmed Monkeypox Cases in US States, by Edward Wang"
title_html = '''
<h3 align="center" style="font-size:16px"><b>{}</b></h3>
'''.format(loc)
map_usa.get_root().html.add_child(folium.Element(title_html))
# Create Folium Choropleth with corresponding data, using GeoJSON and Pandas DataFrame
folium.Choropleth(
geo_data=state_shapes,
name="Choropleth of Monkeypox in USA",
data=state_cases_df,
columns=['State', 'Cases'],
key_on='feature.properties.name',
fill_color="YlOrRd",
fill_opacity=0.5,
line_opacity=0.2,
legend_name=legend_name,
highlight=True,
bins=8
).add_to(map_usa)
# Opens new tab with map
map_usa.save("monkeypox_map.html")
if platform.system() == 'Darwin':
file_location = "file://" + os.path.realpath("monkeypox_map.html")
webbrowser.get().open(file_location, new=1)
elif platform.system() == 'Windows':
file_location = "monkeypox_map.html"
webbrowser.open(file_location, new=1)
def main():
""" Driver function that calls other functions"""
try:
data = get_data()
except Exception as e:
print(f"Could not retrieve data, {e}")
try:
create_map(data[0], data[1])
except Exception as e:
print(f"Could not create map, {e}")
if __name__ == '__main__':
main()