-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathdemo.R
136 lines (107 loc) · 4.57 KB
/
demo.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
# Load packages & data ----------------------------------------------------
# Load packages
library(dplyr)
library(ggplot2)
library(scales)
library(correlation)
library(sjPlot)
# Load the data
data <- read.csv("./data/ZA5665_GESIS_Panel_v49-0-0_CSV_synthetic-data.csv")
# Data Wrangling ----------------------------------------------------------
# Rename variables
data <- data |>
rename(party_id = jazc039a,
survey_eval = jczq009a,
facebook = jczf030a,
insta = jczf032a,
tiktok = jczf039a,
compassion_refugees = jczj074a,
sympathy_refugees = jczj075a,
refugees_neighbor = jczj079a,
connection_eu = jcdu080a,
ukraine_eu = jcdu081a,
russia_threat = jcdu082a,
trust_people = jcct087a)
# Recode sex into a factor with 1 = male and 2 = female
data <- data |>
mutate(sex_factor = factor(sex, levels = c(1, 2),
labels = c("male", "female")))
# Compute age & create age groups based on the reported year of birth
data <- data |>
mutate(age = year - yob)
# Create age groups
data <- data |>
mutate(age_group = factor(case_when(age < 30 ~ "Under 30",
age < 40 ~ "30 to 39",
age < 50 ~ "40 to 49",
age < 60 ~ "50 to 59",
TRUE ~ "60+"),
levels = c("Under 30",
"30 to 39",
"40 to 49",
"50 to 59",
"60+"),
ordered = TRUE))
# Recode party_id into a factor with 1: CDU/CSU, 2: SPD, 3: FDP, 4: DIE LINKE, 5: GRUENE, 6: AfD, 7: Other party
data <- data |>
mutate(party_factor = factor(party_id, levels = c(1, 2, 3, 4, 5, 6, 7),
labels = c("CDU/CSU", "SPD", "FDP", "DIE LINKE", "GRUENE", "AfD", "Other party")))
# Sample descriptives -----------------------------------------------------
summary(data$age)
sd(data$age)
table(data$age_group)
table(data$sex_factor)
prop.table(table(data$facebook))*100
prop.table(table(data$insta))*100
prop.table(table(data$tiktok))*100
# Descriptive group comparisons -------------------------------------------
## Frequency tables -------------------------------------------------------
round(prop.table(table(data$sex_factor, data$facebook), margin = 1)*100, 2)
round(prop.table(table(data$age_group, data$facebook), margin = 1)*100, 2)
round(prop.table(table(data$sex_factor, data$insta), margin = 1)*100, 2)
round(prop.table(table(data$age_group, data$insta), margin = 1)*100, 2)
round(prop.table(table(data$sex_factor, data$tiktok), margin = 1)*100, 2)
round(prop.table(table(data$age_group, data$tiktok), margin = 1)*100, 2)
## Plots ------------------------------------------------------------------
# Party ID per age group
# Calculate the percentage of party identification per age group
percentage_party_identification <- data |>
filter(!is.na(party_factor)) |>
group_by(age_group, party_factor) |>
summarize(n = n()) |>
mutate(percentage = n / sum(n) * 100)
# Create a grouped bar chart using ggplot
ggplot(percentage_party_identification, aes(x = age_group, y = percentage, fill = party_factor)) +
geom_bar(stat = "identity", position = "dodge") +
labs(title = "Party Identification per Age Group",
x = "Age Group",
y = "Percentage",
fill = "Party Identification") +
scale_fill_manual(values = c("#000000", "#FF0000", "#FFD700", "#800080", "#008000", "#0000FF", "#808080")) +
theme_minimal()
# Correlation analysis ----------------------------------------------------
data |>
select(survey_eval,
compassion_refugees,
sympathy_refugees,
refugees_neighbor,
connection_eu,
russia_threat,
trust_people) |>
correlation()
# Regression analysis -----------------------------------------------------
# Linear regression predicting survey evaluation
reg1 <- lm(survey_eval ~ age + sex_factor + trust_people, data = data)
summary(reg1)
plot_model(reg1,
show.values = TRUE,
value.offset = .3,
type = "std",
title = "Linear regression predicting survey evaluation")
# Logistic regression predicting Instagram use
reg2 <- glm(insta ~ age + sex_factor + facebook, data = data, binomial(link = "logit"))
summary(reg2)
plot_model(reg2,
show.values = TRUE,
value.offset = .3,
title = "Logistic regression predicting Instagram use")