-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsim_anova_mxm.R
139 lines (118 loc) · 7.42 KB
/
sim_anova_mxm.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
#-------------------------------------------------------------------------
# Simulation Exercise 3 by DVM Bishop
# 20th March 2017
# Simulating data for multiway ANOVA
#-------------------------------------------------------------------------
## setwd('/Users/au194693/projects/repro17/anova_mxm_sim') # Here is command to set working directory to Dropbox/BBSCR_STARS/Bishop on my mac
# Remember! you will need to download these packages if they aren't already downloaded
library(reshape2) # for converting data from wide to long form (see below)
library(gridExtra) #for plotting output in a grid
library(grid) #https://cran.r-project.org/web/packages/gridExtra/vignettes/tableGrob.html
require(lme4)
options(scipen=999) #disable scientific notation.
set.seed(183)
#-------------------------------------------------------------------------
# We are going to simulate data for a 3 way mixed anova: between/within/within
# Our dependent variable is Score.
# We have two equal groups (0 and 1), as between subjects variables
# Each subject is measured on two occasions (1 and 2), on easy and hard task (1 and 2)
# This gives 4 scores (easytime1, hardtime1, easytime2, hardtime2) for each subject
# We are just going to generate random numbers - there is no real effect
# We will look for effects of factors group, occasion, and difficulty,
# as well as interactions between these
# Be warned: as this website notes: http://www.statmethods.net/stats/anova.html
# 'If you have been analyzing ANOVA designs in traditional statistical packages,
# you are likely to find R's approach less coherent and user-friendly'
# Our aim here, though, is not to teach you to do ANOVA in R, so much as to
# reveal some of the pitfalls in interpreting multiway ANOVA
#-------------------------------------------------------------------------
myM <- 0 # Mean score for all variables in the sample - we're using z scores for simplicity
mySD <- 1 #
myN <-30 #set sample size per group (You can vary this to see the effect)
n_sims <- 100000 # Specify number of simulated datasets
# We'll start with simulating 20 datasets, but can later update this number
ptable=matrix(rep(NA, (n_sims*10)), nrow=n_sims) #initialising a matrix that will hold p values in each run
table_names <- c("A", "B", "C", "AB", "AC", "BC", "ABC", "anysig", "Bonfsig", "fdr")
## data.frame for Mixed models
mxm_dt <- as.data.frame(matrix(0, ncol = length(table_names) , nrow = n_sims))
names(mxm_dt) <- table_names
# There are 7 p-values: 3 main effects, 3 2-way interactions, and 1 3-way interaction
# The last two columns will denote if any p-value in a run is <.05, or < .007 (Bonferroni corrected)
colnames(ptable) <- table_names
for (i in 1:n_sims){
#----------------------------------------------------------------------------------------
# We'll generate a dataset in 'wide' format, i.e. one row per subject, as this is easy to read
#----------------------------------------------------------------------------------------
mydata <-data.frame(matrix(rnorm(n = myN*2*5, mean = myM, sd = mySD),nrow=myN*2))
mydata[,1]=1 #overwrite column 1 with ones
mydata[1:myN,1]<-0 #overwrite first half of column 1 with zeros
colnames(mydata)<-c('Group','Easy1','Hard1','Easy2','Hard2')
mydata$Group <-as.factor(mydata$Group) #this is important!
# You can look at mydata by clicking on its label in the Environment tab
#----------------------------------------------------------------------------------------
mylongdata <- melt(mydata) #converts to long form, with one column with all scores
mylongdata$Subject <- c(seq(1:(myN*2)),seq(1:(myN*2)),seq(1:(myN*2)),seq(1:(myN*2)))
# Add a column giving subject ID: this is repeated for each combination of conditions
mylongdata$time <-c(rep(1,myN*2*2),rep(2,myN*2*2)) #adds column giving time
mylongdata$difficulty <-c(rep(1,myN*2),rep(2,myN*2),rep(1,myN*2),rep(2,myN*2))#adds column giving difficulty
# Note that there are more general ways of reshaping data: this is just simple for this design
# Look now at longdata by clicking on its name in the Environment tab
#----------------------------------------------------------------------------------------
# Save your simulated data as tab-separated text
#----------------------------------------------------------------------------------------
write.table(mylongdata,
paste("/users/mje/anova_sim/repro17/mixed_model_eeg_simulation/data/sim_data/simulated_data_",
i, ".csv", sep =""),
sep=",")
#----------------------------------------------------------------------------------------
# Now run an ANOVA
#----------------------------------------------------------------------------------------
myaov<-summary(aov(value~(time*difficulty*Group)+Error(Subject/(time*difficulty)),data=mylongdata))
myaovbit<-unlist(myaov$`Error: Within`) # tortuous way to extract the relevant bit of output
ptable[i,1:7] <-myaovbit[33:39] #extract the p-values which happen to be values 33-39 in this output
ptable[i,8:9]<-0 #initialise col 8-9 which will categorise each ANOVA in terms of whether *any* sig effects
sigp<-which(ptable[i,1:7]<.05) #find whether any p-values are < .05
if(length(sigp)>0)
{ptable[i,8]<-1} # if so, assign a 1 to column 8
sigp<-which(ptable[i,1:7]<.007) #now do the same with p < .007, i.e. .05/7
if(length(sigp)>0)
{ptable[i,9]<-1} #result is stored in column 9
# fdr correction
fdr_tmp <- which(p.adjust(ptable[i, 1:7], method = "fdr")<.05) #find whether any p-values are < .05
ifelse(length(fdr_tmp)>0, ptable[i, 10]<-1, ptable[i, 10]<-0)
## Change to factor for the MM
mylongdata$Group <- factor(mylongdata$Group)
mylongdata$time <- factor(mylongdata$time)
mylongdata$Subject <- factor(mylongdata$Subject)
## Fit Mixed model
m1 <- lmer(value ~ -1 + ( 1 | Subject ), data = mylongdata, REML= FALSE)
m2 <- update(m1, .~. + time)
m3 <- update(m2, .~. + difficulty)
m4 <- update(m3, .~. + Group)
m5 <- update(m4, .~. + time:difficulty)
m6 <- update(m5, .~. + time:Group)
m7 <- update(m6, .~. + difficulty:Group)
m8 <- update(m7, .~. + time:difficulty:Group)
mxm_dt[i, 1:7] <- anova(m1, m2, m3, m4, m5, m6, m7, m8)$`Pr(>Chisq)`[2:8]
# fdr correct
if (length(sigp)>0)
{mxm_dt[i,8]<-1} # if so, assign a 1 to column 8
sigp<-which(mxm_dt[i,1:7]<.007) #now do the same with p < .007, i.e. .05/7
if(length(sigp)>0)
{mxm_dt[i,9]<-1} #result is stored in column 9
# fdr correction
fdr_tmp <- which(p.adjust(mxm_dt[i, 1:7], method = "fdr")<.05) #find whether any p-values are < .05
if (length(fdr_tmp)>0)
{mxm_dt[i,8]<-1} # if so, assign a 1 to column 8
} #repeat for next simulation
mypf <- round(ptable,digits=2) #get rid of extraneous digits; This doesn't always work- not sure why!
if (n_sims<21){
grid.table(mypf) #doing gridtable gets v slow with more than 20 rows
}
# For an explanation of the issues raised by this exercise see:
# http://deevybee.blogspot.co.uk/2013/06/interpreting-unexpected-significant.html
write.csv(ptable, "/users/mje/anova_sim/repro17/mixed_model_eeg_simulation/data/anova_dt.csv")
write.csv(mxm_dt, "/users/mje/anova_sim/repro17/mixed_model_eeg_simulation/data/mxm_dt.csv")
# This work is licensed under a Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License. https://creativecommons.org/licenses/by-nc-sa/4.0/
#
#