-
Notifications
You must be signed in to change notification settings - Fork 26
/
Copy pathinteractive_worldmap_shiny_app.R
374 lines (316 loc) · 21.9 KB
/
interactive_worldmap_shiny_app.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
# In this blog, I will show you how to create interactive world maps and how to show these
# in the form of an R Shiny app. As the Shiny app cannot be embedded into this blog, I will
# direct you to the live app (https://fverkroost.shinyapps.io/interactive_worldmap_app/) and
# show you on my Github how to embed a Shiny app in your R Markdown files, which is a really
# cool and innovative way of preparing interactive documents. The link to this is here:
# (https://github.com/fverkroost/RStudio-Blogs/blob/master/interactive_worldmap_shiny_embedded.Rmd)
# To show you how to adapt the interface of the app to the choices of the users,
# we'll make use of two data sources such that the user can choose what data they want to
# explore and that the app adapts the possible input choices to the users' previous choices.
# The data sources here are about childlessness and gender inequality, which is the focus of
# my PhD research, where I computationally analyse the effects of gender and parental status
# on socio-economic inequalities. We'll start by loading and cleaning the data, whereafter we
# will build our interactive world maps in R Shiny. Let's first load the required packages.
if(!require(magrittr)) install.packages("magrittr", repos = "http://cran.us.r-project.org")
if(!require(rvest)) install.packages("rvest", repos = "http://cran.us.r-project.org")
if(!require(readxl)) install.packages("readxl", repos = "http://cran.us.r-project.org")
if(!require(plyr)) install.packages("plyr", repos = "http://cran.us.r-project.org")
if(!require(dplyr)) install.packages("dplyr", repos = "http://cran.us.r-project.org")
if(!require(maps)) install.packages("maps", repos = "http://cran.us.r-project.org")
if(!require(ggplot2)) install.packages("ggplot2", repos = "http://cran.us.r-project.org")
if(!require(reshape2)) install.packages("reshape2", repos = "http://cran.us.r-project.org")
if(!require(shiny)) install.packages("shiny", repos = "http://cran.us.r-project.org")
if(!require(ggiraph)) install.packages("ggiraph", repos = "http://cran.us.r-project.org")
if(!require(RColorBrewer)) install.packages("RColorBrewer", repos = "http://cran.us.r-project.org")
if(!require(countrycode)) install.packages("countrycode", repos = "http://cran.us.r-project.org")
if(!require(wbstats)) install.packages("wbstats", repos = "http://cran.us.r-project.org")
# Now, we can continue with loading our data. As we'll make world maps, we need a way to map our
# data sets to geographical data containing coordinates (longitude and latitude). As different
# data sets have different formats for country names (e.g. "United Kingdom of Great Britain and
# Northern Ireland" versus "United Kingdom"), we'll match country names to ISO3 codes to easily
# merge all data sets later on. Therefore, we first scrape an HTML table of country names, ISO3,
# ISO2 and UN codes for all countries worldwide. We use the *rvest* package using the XPath to
# indicate what part of the web page contains our table of interest. We use the pipeline (%>%)
# from the *magrittr* package to feed our URL of interest into functions that read the HTML table
# using the XPath and convert that to a data frame in R. One can obtain the XPath by hovering
# over the HTML table in developer mode on the browser and having show the XPath. The first
# element in the resulting list contains our table of interest, and as the first column is empty,
# we delete it. Also, as you can see from the HTML table in the link, there are some rows that
# show the letter of the alphabet before starting with a list of countries of which the name starts
# with that letter. As these rows contain the particular letter in all columns, we can delete these
# by deleting all rows for which all columns have equal values.
library(magrittr)
library(rvest)
library(countrycode)
iso_codes = countrycode::codelist[, c("un.name.en", "iso3c")]
names(iso_codes) = c("Country", "ISO3")
# Next, we'll collect our first data set, which is a data set on childlessness provided by the United
# Nations. We download the file from the link, save it locally and then load it into RStudio using the
# *read_excel()* function in the *readxl* package.
library(readxl)
url <- "https://www.un.org/en/development/desa/population/publications/dataset/fertility/wfr2012/Data/Data_Sources/TABLE%20A.8.%20%20Percentage%20of%20childless%20women%20and%20women%20with%20parity%20three%20or%20higher.xlsx"
destfile <- "dataset_childlessness.xlsx"
download.file(url, destfile)
childlessness_data <- read_excel(destfile)
head(childlessness_data)
# We can see that the childlessness data are a bit messy, especially when it comes to the first couple
# of rows and column names. We only want to maintain the columns that have country names, periods and
# childlessness estimates for different age groups, as well as the rows that refer to data for specific
# countries. The resulting data look much better. Note that when we'll convert the childlessness
# percentage columns to numeric type later on, the ".." values will automatically change to NA.
cols <- which(grepl("childless", childlessness_data[2, ]))
childlessness_data <- childlessness_data[-c(1:3), c(1, 3, cols:(cols + 2))]
names(childlessness_data) <- c("Country", "Period", "35-39", "40-44", "45-49")
head(childlessness_data)
# Our second data set is about measures of gender inequality, provided by the World Bank. We read this
# .csv file directly into RStudio from the API provided by the World Bank via package "wbstats".
library(wbstats)
library(plyr)
library(dplyr)
library(wbstats)
library(plyr)
library(dplyr)
# List the gender-relevant data sets from the World Bank
datasets = wb_search("gender", fields = c("indicator"))
# Try to load these relevant data sets into R
df_list = list()
for (i in 1:nrow(datasets)){
print(i)
df_list[[i]] <- tryCatch(
{ df = wb_data(datasets$indicator_id[i], start_date = 2000, end_date = 2020) },
error=function(cond) { return(NULL) }
)
}
# Remove the NULL data sets (returning an error from the World Bank API)
df_list = df_list[which(!unlist(lapply(df_list, is.null)))]
# Define the identification columns
id.cols = c("iso2c", "iso3c", "country", "date","unit", "obs_status", "footnote", "last_updated")
# Make sure the gender index data for each indicator are in a
# similar format as the other data to be used (as well as
# previous versions of these data)
recent_df_list = list()
for (i in 1:length(df_list)){
# Rename columns into relevant names
ind_idx = !names(df_list[[i]]) %in% id.cols
ind_nam = names(df_list[[i]])[ind_idx]
df_list[[i]]["Indicator"] = ind_nam
names(df_list[[i]])[ind_idx] = "Value"
missing_cols = id.cols[!id.cols %in% names(df_list[[i]])]
if (length(missing_cols) > 0){
for (j in 1:length(missing_cols)){
df_list[[i]][missing_cols[j]] = NA
}
}
# Add the most recent observations separately
recent_rows = as.data.frame(df_list[[i]] %>%
dplyr::filter(!is.na(Value) & !is.na(date)) %>%
dplyr::group_by(iso3c) %>%
dplyr::arrange(date, .by_group = T) %>%
dplyr::slice(tail(row_number(), 1)))
if (nrow(recent_rows) > 0){
recent_rows["date"] = "RecentYear"
df_list[[i]]$date = as.character(df_list[[i]]$date)
recent_df_list[[i]] = rbind(as.data.frame(df_list[[i]]), recent_rows)
}
}
# Rename columns to make them compatible with childlessness data
# and previous versions of these gender index data
gender_index_melt = rbind.fill(recent_df_list)
gender_index_melt$Indicator = datasets$indicator[match(gender_index_melt$Indicator, datasets$indicator_id)]
names(gender_index_melt)[names(gender_index_melt) == "country"] = "Country"
names(gender_index_melt)[names(gender_index_melt) == "iso3c"] = "ISO3"
names(gender_index_melt)[names(gender_index_melt) == "date"] = "Period"
keep_cols = c("ISO3", "Country", "Period", "Indicator", "Value")
gender_index_melt = gender_index_melt[, names(gender_index_melt) %in% keep_cols]
# Next, we load in our world data with geograpical coordinates directly from package *ggplot2*. These data
# contain geographical coordinates of all countries worldwide, which we'll later need to plot the worldmaps.
library(maps)
library(ggplot2)
world_data <- ggplot2::map_data('world')
world_data <- fortify(world_data)
head(world_data)
# To map our data, we need to merge the childlessness, gender gap index and world map data. As said before,
# these all have different notations for country names, which is why we'll use the ISO3 codes. However,
# even between the ISO code data and the other data sets, there is discrepancy in country names.
# Unfortunately, to solve this, we need to manually change some country names in our data to match those in
# the ISO code data set.
old_names <- c("Bolivia (Plurinational State of)", "Cabo Verde", "China, Hong Kong Special Administrative Region",
"China, Macao Special Administrative Region", "Congo", "Democratic People's Republic of Korea",
"Democratic Republic of the Congo", "Iran (Islamic Republic of)", "Lao People's Democratic Republic",
"Micronesia (Federated States of)", "Republic of Korea", "Republic of Moldova", "Saint Vincent and the Grenadines",
"State of Palestine", "Syrian Arab Republic", "The former Yugoslav Republic of Macedonia",
"United Kingdom of Great Britain and Northern Ireland", "United Republic of Tanzania",
"United States Virgin Islands", "Venezuela (Bolivarian Republic of)")
new_names <- c("Bolivia", "Cape Verde", "Hong Kong, SAR China", "Macao, SAR China", "Congo (Brazzaville)",
"Korea (North)", "Congo, (Kinshasa)", "Iran, Islamic Republic of", "Lao PDR", "Micronesia, Federated States of",
"Korea (South)", "Moldova", "Saint Vincent and Grenadines", "Palestinian Territory", "Syrian Arab Republic (Syria)",
"Macedonia, Republic of", "United Kingdom", "Tanzania, United Republic of", "Virgin Islands, US", "Venezuela (Bolivarian Republic)")
for (i in 1:length(old_names)){
childlessness_data$Country[childlessness_data$Country == old_names[i]] <- new_names[i]
}
old_names <- c("French Southern and Antarctic Lands", "Antigua", "Barbuda", "Saint Barthelemy", "Brunei", "Ivory Coast",
"Democratic Republic of the Congo", "Republic of Congo", "Falkland Islands", "Micronesia", "UK",
"Heard Island", "Cocos Islands", "Iran", "Nevis", "Saint Kitts", "South Korea", "Laos", "Saint Martin",
"Macedonia", "Pitcairn Islands", "North Korea", "Palestine", "Russia", "South Sandwich Islands",
"South Georgia", "Syria", "Trinidad", "Tobago", "Taiwan", "Tanzania", "USA", "Vatican", "Grenadines",
"Saint Vincent", "Venezuela", "Vietnam", "Wallis and Fortuna")
new_names <- c("French Southern Territories", rep("Antigua and Barbuda", 2), "Saint-Barthélemy",
"Brunei Darussalam", "Côte d'Ivoire", "Congo, (Kinshasa)", "Congo (Brazzaville)",
"Falkland Islands (Malvinas)", "Micronesia, Federated States of", "United Kingdom",
"Heard and Mcdonald Islands", "Cocos (Keeling) Islands", "Iran, Islamic Republic of",
rep("Saint Kitts and Nevis", 2), "Korea (South)", "Lao PDR", "Saint-Martin (French part)",
"Macedonia, Republic of", "Pitcairn", "Korea (North)", "Palestinian Territory", "Russian Federation",
rep("South Georgia and the South Sandwich Islands", 2),
"Syrian Arab Republic (Syria)", rep("Trinidad and Tobago", 2), "Taiwan, Republic of China",
"Tanzania, United Republic of", "United States of America", "Holy See (Vatican City State)",
rep("Saint Vincent and Grenadines", 2), "Venezuela (Bolivarian Republic)", "Viet Nam", "Wallis and Futuna Islands")
for (i in 1:length(old_names)){
world_data$region[world_data$region == old_names[i]] <- new_names[i]
}
# Now the name changes for countries have been made, we can add the ISO3 codes to our childlessness and world
# map data. The gender gap index data already contain these codes, so there's no need for us to add these there.
childlessness_data['ISO3'] <- iso_codes$ISO3[match(childlessness_data$Country, iso_codes$Country)]
world_data["ISO3"] <- iso_codes$ISO3[match(world_data$region, iso_codes$Country)]
# Next, we melt the childlessness and gender gap index data into long format such that they will have similar
# shape and column names for merging. The *melt()* function is included in package *reshape2*. The goal here
# is to create variables that have different unique values for the different data, such that I can show you
# how to adapt the R Shiny app input to the users' choices. For example, we'll create a *DataType* column that
# has value *Childlessness* for the rows of the childlessness data and value *Gender Gap Index* for all rows
# of the gender gap index data. We'll also create a column *Period* that contains earlier, middle and later
# periods for the childlessness data and different years for the gender gap index data. As such, when the
# user chooses to explore the childlessness data, the input for the period will only contain the choices
# relevant to the childlessness data (i.e. earlier, middle and later periods and no years). When the user
# chooses to explore the gender gap index data, they will only see different years as choices for the input
# of the period, and not earlier, middle and later periods. The same goes for the *Indicator* column. This
# may sound slightly vague at this point, but we'll see this in practice later on when building the R Shiny app.
library(reshape2)
childlessness_melt <- melt(childlessness_data, id = c("Country", "ISO3", "Period"),
variable.name = "Indicator", value.name = "Value")
childlessness_melt$Value <- as.numeric(childlessness_melt$Value)
# After melting the data and ensuring they're in the same format, we merge them together using the *rbind()*
# function, which we can do here because the data have the same colum names.
childlessness_melt["DataType"] <- rep("Childlessness", nrow(childlessness_melt))
gender_index_melt["DataType"] <- rep("Gender Gap Index", nrow(gender_index_melt))
df <- rbind(childlessness_melt, gender_index_melt)
df[] <- lapply(df, as.character)
df$Value <- as.numeric(df$Value)
# Next, it's time to define the function that we'll use for building our world maps. The inputs to this
# function are the merged data frame, the world data containing geographical coordinates, and the data type,
# period and indicator the user will select in the R Shiny app. We first define our own theme, *my_theme()*
# for setting the aesthetics of the plot. Next, we select only the data that the user has selected to view,
# resulting in *plotdf*. We keep only the rows for which the ISO3 code has been specified. For some countries
# (e.g. Channel Islands in the childlessness data), this was not the case, as these are not contained in the
# ISO code data. We then add the data the user wants to see to the geographical world data. Finally, we plot
# the world map. The most important part of this plot is that contained in the *geom_polygon_interactive()*
# function from the *ggiraph* package. This function draws the world map in white with grey lines, fills it
# up according to the value of the data selected (either childlessness or gender gap rank) in a red-to-blue
# color scheme set using the *brewer.pal()* function from the *RColorBrewer* package, and interactively shows
# at the tooltip the ISO3 code and value when hovering over the plot.
worldMaps <- function(df, world_data, data_type, period, indicator){
# Function for setting the aesthetics of the plot
my_theme <- function () {
theme_bw() + theme(axis.title = element_blank(),
axis.text = element_blank(),
axis.ticks = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_blank(),
legend.position = "bottom",
panel.border = element_blank(),
strip.background = element_rect(fill = 'white', colour = 'white'))
}
# Select only the data that the user has selected to view
plotdf <- df[df$Indicator == indicator & df$DataType == data_type & df$Period == period,]
plotdf <- plotdf[!is.na(plotdf$ISO3), ]
# Add the data the user wants to see to the geographical world data
world_data['DataType'] <- rep(data_type, nrow(world_data))
world_data['Period'] <- rep(period, nrow(world_data))
world_data['Indicator'] <- rep(indicator, nrow(world_data))
world_data['Value'] <- plotdf$Value[match(world_data$ISO3, plotdf$ISO3)]
# Create caption with the data source to show underneath the map
capt <- paste0("Source: ", ifelse(data_type == "Childlessness", "United Nations" , "World Bank"))
# Specify the plot for the world map
library(RColorBrewer)
library(ggiraph)
g <- ggplot() +
geom_polygon_interactive(data = subset(world_data, lat >= -60 & lat <= 90), color = 'gray70', size = 0.1,
aes(x = long, y = lat, fill = Value, group = group,
tooltip = sprintf("%s<br/>%s", ISO3, Value))) +
scale_fill_gradientn(colours = brewer.pal(5, "RdBu"), na.value = 'white') +
labs(fill = data_type, color = data_type, title = NULL, x = NULL, y = NULL, caption = capt) +
my_theme()
return(g)
}
# Now we have our data and world mapping function ready and specified, we can start building our R Shiny app.
# If you're not in any way familiar with R Shiny, I recommend you to have a look at the Getting Started guide
# (https://shiny.rstudio.com/tutorial/) first. we can build our app by specifying the UI and server. In the UI,
# we include a fixed user input selection where the user can choose whether they want to see the childlessness
# or gender gap index data. We further include dynamic inputs for the period and indicators the user wants to see.
# As mentioned before, these are dynamic because the choices shown will depend on the selections made by the user
# on previous inputs. We then use the *ggiraph* package to output our interactive world map. We use the
# *sidebarLayout* to be able to show the input selections on the left side and the world map on its right side,
# rather than the two underneath each other. Everything that depends on the inputs by the user needs to be specified
# in the server function, which is here not only the world map creation but also the second and third input choises,
# as these depend on the previous inputs made by the user. For example, when we run the app later we'll see that
# when the user selects the childlessness data for the first input for data type, the third indicator input will
# only show age groups, and the text above the selector will also show "age group", whereas when the user selects
# the gender gap index data, the third indicator will show different measures and the text above the selector
# will show "indicator" rather than "age group".
library(shiny)
library(ggiraph)
# Define the UI
ui = fluidPage(
# App title
titlePanel("Childlessness and Gender Gap Index Data"),
# Sidebar layout with input and output definitions
sidebarLayout(
# Sidebar panel for inputs
sidebarPanel(
# First input: Type of data
selectInput(inputId = "data_type",
label = "Choose the type of data you want to see:",
choices = list("Childlessness" = "Childlessness", "Gender Gap Index" = "Gender Gap Index")),
# Second input (choices depend on the choice for the first input)
uiOutput("secondSelection"),
# Third input (choices depend on the choice for the first and second input)
uiOutput("thirdSelection")
),
# Main panel for displaying outputs
mainPanel(
# Hide errors
tags$style(type = "text/css",
".shiny-output-error { visibility: hidden; }",
".shiny-output-error:before { visibility: hidden; }"),
# Output: interactive world map
girafeOutput("distPlot")
)
)
)
# Define the server
server = function(input, output) {
# Create the interactive world map
output$distPlot <- renderGirafe({
ggiraph(code = print(worldMaps(df, world_data, input$data_type, input$period, input$indicator)))
})
# Change the choices for the second selection on the basis of the input to the first selection
output$secondSelection <- renderUI({
choice_second <- as.list(unique(df$Period[which(df$DataType == input$data_type)]))
selectInput(inputId = "period", choices = choice_second,
label = "Choose the period for which you want to see the data:")
})
# Change the choices for the third selection on the basis of the input to the first and second selections
output$thirdSelection <- renderUI({
lab <- ifelse(input$data_type == "Childlessness", "age group", "indicator")
choice_third <- as.list(unique(df$Indicator[df$DataType == input$data_type & df$Period == input$period]))
selectInput(inputId = "indicator", choices = choice_third,
label = paste0("Choose the type of ", lab, " you want to explore:"))
})
}
# Finally, we can run our app by either clicking "Run App" in the top of our RStudio IDE, or by running
shinyApp(ui = ui, server = server)
# You can check out the live app here (https://fverkroost.shinyapps.io/interactive_worldmap_app/). In this
# post (https://github.com/fverkroost/RStudio-Blogs/blob/master/interactive_worldmap_shiny_embedded.Rmd)
# on my Github, you can also see how to embed a Shiny app in your R Markdown files, which is a really cool
# and innovative way of preparing interactive documents.
# Now try selecting different inputs and see how the input choices change when doing so. Also, don't
# forget to try hovering over the world map to see different data values for different countries interactively!