forked from ourcodingclub/CC-Linear-mixed-models
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathMixed_Model_SS_Adam.R
264 lines (173 loc) · 9.27 KB
/
Mixed_Model_SS_Adam.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
## load the data and have a look at it
setwd("~/GitHub/CC-Linear-mixed-models")
# https://ourcodingclub.github.io/tutorials/mixed-models/
library(tidyverse)
library(lme4)
library(emmeans)
library(MuMIn)
library(patchwork)
ss_data <- read_csv("stepping_stones_data.csv")
head(ss_data)
## Let's say we want to know how the body length affects test scores.
## Have a look at the data distribution:
hist(ss_data$Met_Pwr) # somewhat normal, but not bimodal
## It is good practice to standardise your explanatory variables before proceeding - you can use scale() to do that:
## this means that is is standardized toa percentage of the distribution...
ss_data$COV_s <- scale(ss_data$COV)
## Back to our question: is Met_Int affected by COV?
###---- Fit all data in one analysis -----###
## One way to analyse this data would be to try fitting a linear model to all our data, ignoring the sites and the mountain ranges for now.
library(lme4)
library(dplyr)
basic.lm <- lm(Met_Pwr ~ COV, data = ss_data)
summary(basic.lm)
## Let's plot the data with ggplot2
library(ggplot2)
ggplot(ss_data, aes(x = Subject, y = Met_Pwr)) +
geom_point()+
geom_smooth(method = "lm")
### Assumptions?
## Plot the residuals - the red line should be close to being flat, like the dashed grey line
plot(basic.lm, which = 1) # not perfect, but look alright
## Have a quick look at the qqplot too - point should ideally fall onto the diagonal dashed line
plot(basic.lm, which = 2) # a bit off at the extremes, but that's often the case; again doesn't look too bad
## However, what about observation independence? Are our data independent?
## We collected multiple samples from 18 participants...
## It's perfectly plausible that the data from within each participant are more similar to each other than the data from different participants - they are correlated. Pseudoreplication isn't our friend.
## Have a look at the data to see if above is true
boxplot(Met_Pwr ~ Subject, data = ss_data) # certainly looks like something is going on here
# ## We could also plot it colouring points by subject
# ggplot(ss_data, aes(x = COV2, y = Met_Int, colour = Subject))+
# geom_point(size = 2)+
# theme_classic()+
# theme(legend.position = "none")
# ### WANT MORE COLOR ###
# ## From the above plots it looks like our met cost vary both in the COV and in their subject characteristics. This confirms that our observations from within each of the met cost aren't independent. We can't ignore that.
## So what do we do?
###----- Run multiple analyses -----###
## We could run many separate analyses and fit a regression for each of the subjects.
## Lets have a quick look at the data split by subjects
## We use the facet_wrap to do that
ggplot(aes(COV, Met_Pwr), data = ss_data) + geom_point() +
facet_wrap(~ Subject) +
xlab("COV") + ylab("Metabolic power")
##----- Modify the model -----###
## We want to use all the data, but account for the data coming from different subjects
## let's add subjects as a fixed effect to our basic.lm
#### HELP WITH INTERP: https://www.r-bloggers.com/2009/11/r-tutorial-series-simple-linear-regression/
Subjects.lm <- lm(Met_Pwr ~ COV + Subject, data = ss_data)
summary(Subjects.lm)
#### COV IS ACTUALLY VERY SIGNIFICANT, but we are uncertain whether the variability in the subject characteristics
## are influencing the values that we are getting. We want to account for that in our interpretation of the data.
# thus we will try to interpret that variability
###----- Mixed effects models -----###
# A mixed model is a good choice here: it will allow us to use all the data we
# have (higher sample size) and account for the correlations between data coming
# from the subjects. We will also estimate fewer parameters and
# avoid problems with multiple comparisons that we would encounter while using
# separate regressions. We are going to work in lme4, so load the package
# (or use install.packages if you don’t have lme4 on your computer
# install.packages("sjstats")
library(lme4)
library(sjstats)
mixed.lmer <- lmer(Met_Pwr ~ COV + (1 | Subject), data = ss_data)
summary(mixed.lmer)
### this is Dan's stuff that should be ignored for now
a <- summary(mixed.lmer2) # from DAN
r_sq <- r.squaredGLMM(mixed.lmer2)
#r_sq value tells me that the correlation for estimated metabolic power for each condition of variability is
# 0.68 for no pooling
## will output ICC, adjusted ICC and unadjusted ICC
## MATCHES WHAT DAN WROTE ABOUT IN HIS EMAIL
# take variance for =Subjects and divide by total variance
0.03823/(0.03823+0.02212) #63%
#we can see that Subject was clearly important: it explains a lot of the variation
#we know that because we can take the variance for Subjects and divide it by the total variance
#So the difference between Subjects explains ~63% of the variance that is "left over"
# after the variance explained by our fixed effects... which is the relationship between metabolic cost
# and the coefficient of variablity
plot(mixed.lmer) # looks alright, no patterns evident
qqnorm(resid(mixed.lmer))
qqline(resid(mixed.lmer)) # points fall moderately nicely onto the line - good!
#points are fitting better onto the line... but not perfectly...
# https://data.library.virginia.edu/understanding-q-q-plots/
# but I think we are OK...
(mm_plot <- ggplot(ss_data, aes(x = COV, y = Met_Pwr, colour = Subject)) +
facet_wrap(~Subject, nrow=3) + # a panel for each mountain range
geom_point(alpha = 0.5) +
theme_classic() +
geom_line(data = cbind(ss_data, pred = predict(mixed.lmer)), aes(y = pred), linewidth = 1) + # adding predicted line from mixed model
theme(legend.position = "none",
panel.spacing = unit(2, "lines")) # adding space between panels
)
# data presented for each subject with an adjusted intercept but a constant slope
# https://mfviz.com/hierarchical-models/
mixed.ranslope <- lmer(Met_Pwr ~ COV + (1 + COV|Subject), data = ss_data)
summary(mixed.ranslope)
### plot
(mm_plot <- ggplot(ss_data, aes(x = COV, y = Met_Pwr, colour = Subject)) +
facet_wrap(~Subject, nrow=3) + # a panel for each mountain range
geom_point(alpha = 0.5) +
theme_classic() +
geom_line(data = cbind(ss_data, pred = predict(mixed.ranslope)), aes(y = pred), linewidth = 1) + # adding predicted line from mixed model #and applies geometric but applies it individually
theme(legend.position = "none",
panel.spacing = unit(2, "lines")) # adding space between panels
)
install.packages("ggeffects")
library(ggeffects) # install the package first if you haven't already, then load it
# Extract the prediction data frame
# Plot the predictions
#extract predictions from dataframe
pred.mm <- ggpredict(mixed.lmer, terms = c("COV")) # this gives overall predictions for the model
# c() returns as a vector list
# final plotted slope across each subject
(ggplot(pred.mm) +
geom_line(aes(x = x, y = predicted)) + # slope
geom_ribbon(aes(x = x, ymin = predicted - std.error, ymax = predicted + std.error),
fill = "lightgrey", alpha = 0.5) + # error band
geom_point(data = ss_data, # adding the raw data (scaled values)
aes(x = COV, y = Met_Pwr, colour = Subject)) +
labs(x = "COV", y = "Met_Pwr",
title = "COV does affect Met_Int in Subjects") +
theme_minimal()
)
#plot each of the predicted slope wrapped to each subject independently
ggplot(ss_data, aes(x = COV, y = Met_Pwr, colour = Subject)) +
facet_wrap(~Subject, nrow=3) + # a panel for each subject
geom_point(alpha = 0.5) +
theme_classic() +
geom_line(data = cbind(ss_data, pred = predict(mixed.ranslope)), aes(y = pred), size = 1) + # adding predicted line from mixed model #and applies geometric but applies it individually
theme(legend.position = "none",
panel.spacing = unit(2, "lines")) # adding space between panels
# combining and incorperating unique slope
predslm = predict(mixed.ranslope, interval = "confidence")
head(predslm)
require(ggplot2)
install.packages("ggpubr")
install.packages("rempsyc")
pub_plot <- ggplot(ss_data, aes(x=COV, y=Met_Pwr, colour=Subject))+
geom_point(alpha = 0.25)+
geom_line(data = cbind(ss_data, pred = predict(mixed.ranslope)), aes(y = pred), size = 1)
# geom_ribbon( aes(ymin = lwr, ymax = upr, fill = Subject, color = NULL), alpha = .15)
pub_plot + theme_bw() + theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank(), axis.line = element_line(colour = "black"))
coef(mixed.ranslope)
predict(mixed.ranslope)
# #cbind combines two rows of data together
# #basically we add predicted to the end of ss_data, temporarily that gets fed in.
#
# temp <- cbind(ss_data, pred = predict(mixed.ranslope))
# pred = predict(mixed.ranslope) ## is our 18 participants and the four data points for predicted...
# view(pred)
#
# ggplot(ss_data, aes(x = COV, y = Met_Pwr, colour = Subject)) +
# geom_line(data = cbind(ss_data, pred = predict(mixed.ranslope)), size = 1) # adding predicted line from mixed model #and applies geometric but applies it individually
#
# ggplot(ss_data, aes(x = COV, y = Met_Pwr, colour = Subject)) +
# geom_line(data = cbind(ss_data, pred = predict(mixed.ranslope)), aes(y = pred), size = 1)
# #this is what the predicted slopes would look like, mapped for the mixed.ranslope LMER.
#
#
#
#
#