-
Notifications
You must be signed in to change notification settings - Fork 0
/
5b_accessed_data_grouped.R
131 lines (113 loc) · 5.79 KB
/
5b_accessed_data_grouped.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
### Using collaborative open science tools to improve engagement with the
# ecology of the Guana River Estuary
# Geraldine Klarenberg, PhD
# 17 May 2023
# Visualizing "have accessed" data - GROUPED PER DATASET
# Start all runs of this script with:
renv::restore()
# This ensures it uses the packages last used when everything worked okay. This
# also ensures these packages are installed if you don't have them
library(tidyverse)
library(cowplot)
#### Load data --------------------------------------------------
yes_data <- read_csv("2_data_deidentified/subsets/data_yes_results_basic.csv")
# This script groups the questions and answers together by dataset
# YD-2 - How do you most frequently obtain or access Guana Estuary data on [Field-1]?
# YD-3 - What are the advantages of this primary method of accessing or obtaining Guana Estuary data on [Field-1]?
# YD-4 - What are the disadvantages of the primary method of accessing or obtaining Guana Estuary data on [Field-1]?
# YD-5 - How often do/did you access or obtain Guana Estuary data on [Field-1]?
# YD-6 - What do you typically use Guana Estuary data on [Field-1] for?
# YD-7 - How well do Guana Estuary data on [Field-1] generally satisfy your need(s)?
# Create color schemes associated with factors (answers)
# There are 8 dataset options to choose, for data accessed. Loop over all of them.
for (i in 1:8){
data_name <- yes_data %>%
filter(field_no == i) %>%
pull(field_name)
data_name <- data_name[1]
obtain <- ggplot(yes_data %>%
filter(field_no == i, qname_main == "YD-2") %>%
group_by(q_text) %>%
summarize(cnt = n()) %>%
mutate(percentage = round(cnt/sum(cnt)*100, 1)),
aes(y = reorder(q_text, percentage), x = percentage, fill = q_text))+
geom_col() +
labs(x = "", y = "",
title = data_name,
subtitle = "How do you most frequently obtain or access these data?") +
scale_fill_viridis_d(option = "viridis")+
theme_bw()+
theme(legend.position = "none",
plot.title.position = "plot")
adv <- ggplot(yes_data %>%
filter(field_no == i, qname_main == "YD-3") %>%
group_by(q_text) %>%
summarize(cnt = n()) %>%
mutate(percentage = round(cnt/sum(cnt)*100, 1)),
aes(y = reorder(q_text, percentage), x = percentage, fill = q_text))+
geom_col() +
labs(x = "", y = "", subtitle = "What are the advantages of this primary method of accessing or obtaining these data?") +
scale_fill_viridis_d(option = "mako")+
theme_bw()+
theme(legend.position = "none",
plot.title.position = "plot")
disadv <- ggplot(yes_data %>%
filter(field_no == i, qname_main == "YD-4") %>%
group_by(q_text) %>%
summarize(cnt = n()) %>%
mutate(percentage = round(cnt/sum(cnt)*100, 1)),
aes(y = reorder(q_text, percentage), x = percentage, fill = q_text))+
geom_col() +
labs(x = "", y = "", subtitle = "What are the disadvantages of this primary method of accessing or obtaining these data?") +
scale_fill_viridis_d(option = "rocket")+
theme_bw()+
theme(legend.position = "none",
plot.title.position = "plot")
howoften <- ggplot(yes_data %>%
filter(field_no == i, qname_main == "YD-5") %>%
group_by(q_text) %>%
summarize(cnt = n()) %>%
mutate(percentage = round(cnt/sum(cnt)*100, 1)),
aes(y = factor(q_text, levels = c("Daily", "At least once a week",
"2-3 times a month", "Once a month",
"Once every 6 months", "Once every year",
"Less than once a year")), # Create levels so they stay in the correct order
x = percentage))+
geom_col(fill = "green3") +
labs(x = "", y = "", subtitle = "How often do/did you access or obtain these data?") +
theme_bw()+
theme(legend.position = "none",
plot.title.position = "plot")
use <- ggplot(yes_data %>%
filter(field_no == i, qname_main == "YD-6") %>%
group_by(q_text) %>%
summarize(cnt = n()) %>%
mutate(percentage = round(cnt/sum(cnt)*100, 1)),
aes(y = reorder(q_text, percentage),
x = percentage,
fill = q_text))+
geom_col() +
labs(x = "", y = "", subtitle = "What do you typically use these data for?") +
scale_fill_viridis_d(option = "plasma")+
theme_bw()+
theme(legend.position = "none",
plot.title.position = "plot")
satisf <- ggplot(yes_data %>%
filter(field_no == i, qname_main == "YD-7") %>%
group_by(q_text) %>%
summarize(cnt = n()) %>%
mutate(percentage = round(cnt/sum(cnt)*100, 1)),
aes(y = factor(q_text,levels = c("Extremely well", "Very well",
"Moderately well", "Slightly well",
"Not well at all")),
x = percentage))+
geom_col(fill = "skyblue2") +
labs(x = "", y = "", subtitle = "How well do these data generally satisfy your need(s)?") +
theme_bw()+
theme(legend.position = "none",
plot.title.position = "plot")
all <- plot_grid(obtain, adv, disadv, howoften, use, satisf, ncol = 1, align = "v")
ggsave(filename = paste0("8_results/yes_access_plots_", i, ".jpg"), plot = all,
width = 7, height = 15)
}