-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathsimulatuion_errorI.R
138 lines (115 loc) · 5.23 KB
/
simulatuion_errorI.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
# Simulations for type 1 error.
#We will permute the spiders data at random [I used method A, Permute data in individual rows, method B, Permute data in individual columns], producing each time a data matrix in which H0 is certainly true.
#We will then run function dbd.perm2() and obtain the associated p-value.
#We will repeat this simulation a large number of times and obtain an estimate of the proportion of the simulations where H0 was rejected at a variety of alpha rejection levels {0.01, 0.05, 0.10, 0.20}.
#“A statistical testing procedure is valid if the probability of a type I error (rejecting H0 when true) is no greater than α, the level of significance, for any α.” (Edgington 1995, p. 37).
simul.type1.dbd = function(nsim=100, n=28, p=12, g=NULL, pg = 6, gen.method=1, prob.mat1=0.8, sd1=1.5,perm.method=2)
{
# Generate matrix mat1 (site x species)
if(gen.method==1) { # Random Poisson data
mat1 <- matrix(rpois((n*p),prob.mat1),n,p)
} else { # Random lognormal data
mat1 <- matrix(round(exp(rnorm((n*p),0,sd1))),n,p)
}
out = matrix(NA, nsim, 3)
rownames(out) = paste("Simul",1:nsim,sep=".")
colnames(out) = c("cor.sg", "cor.t", "p.val")
aa <- system.time({
for(i in 1:nsim) {
if(perm.method==1) { # Permute p/a data in individual rows of mat1 (sites x species), method A
for(ii in 1:n) mat1.H0[ii,] = sample(mat1[ii,])
}
if(perm.method==2) {
# Permute data in individual columns of mat1 (sites x species) # method B
mat1.H0 = apply(mat1,2,sample)
}
tmp = dbd.perm2(mat1.H0, g, perm.method=perm.method, nperm=999, clock=FALSE)
out[i,] = c(tmp$Stat.out[1,1:2], tmp$p.val)
}
})
aa[3] <- sprintf("%2f", aa[3])
cat("Time for computation =", aa[3], " sec\n")
#
data.frame(out)
}
dbd.perm2 <- function(mat, g, perm.method=1, nperm=999, nn.print=0,clock=TRUE)
{
epsilon <- sqrt(.Machine$double.eps)
aa <- system.time({
mat = as.matrix(mat)
n <- nrow(mat)
p <- ncol(mat)
# Create matrix mat1 (sites x species), p/a data
mat1=.Call("presence_absence",mat)
rich.sp=.Call("richesse",mat1)
# Create matrix A of dummy variables (species x genera)
g = as.factor(g)
ng = length(g)
A = model.matrix(~g-1)
A = A[1:ng,]
colnames(A) = levels(g)
# Create matrix mat2 (sites x genera), p/a data
mat2=.Call("produit",mat,A)
mat2=.Call("presence_absence",mat2)
rich.gen=.Call("richesse",mat2)
Nsp.by.Ngen = rich.sp/rich.gen
Richness = as.data.frame(cbind(rich.gen,rich.sp, Nsp.by.Ngen))
Richness=Richness[Richness$rich.gen !=0,]
Richness.emp=Richness
Stat.out = matrix(NA,nperm+1,2)
colnames(Stat.out) = c("cor.sg", "cor.t")
rownames(Stat.out) = paste("perm", 0:nperm, sep=".")
# Compute statistics for unpermuted data and write them to Stat.out
Stat.out[1,1] <- cor.sg <- cor(Richness$rich.gen, Richness$Nsp.by.Ngen)
Stat.out[1,2] <- cor.t <- cor.sg * sqrt(n-2)/sqrt(1-cor.sg^2)
if(cor.sg > 1.0-sqrt(epsilon)) cat("cor = 1.0; a test is useless.\n")
#
# Permutations begin
if((perm.method<1) | (perm.method>3)) stop ("Method not implemented")
for(i in 1:(nperm)) {
mat1.perm = matrix(0,n,p)
### this permutation was dropped from the paper
# if(perm.method==1) { # Permute p/a data in in the whole matrix mat1 (sites x species)
# mat1.perm=.Call("sampleC_all",mat1)
# }
## this is model A in the paper
if(perm.method==1) { # Permute p/a data in individual rows of mat1 (sites x species)
mat1.perm=.Call("sampleC_row_real",mat1)
}
##this is model B in the paper
if(perm.method==2) { # Permute data in individual columns of mat1 (sites x species)
mat1.perm = .Call("sampleC_col_real",mat1)
}
mat1.perm=.Call("presence_absence",mat1.perm)
# Create matrix "mat2" of sites x genera
mat2=.Call("produit",mat1.perm,A) ##produit matriciel
mat2=.Call("presence_absence",mat2)
rich.gen=.Call("richesse",mat2)
length(rich.gen[rich.gen==0])
rich.sp=.Call("richesse",mat1.perm)
Nsp.by.Ngen.perm = rich.sp/rich.gen
Richness = as.data.frame(cbind(rich.gen, Nsp.by.Ngen.perm))
Richness=Richness[Richness$rich.gen !=0,]
# Compute statistics
Stat.out[i+1,1] <- cor.sg.perm <- cor(Richness$rich.gen, Richness$Nsp.by.Ngen.perm)
Stat.out[i+1,2] <- cor.t.perm <- cor.sg.perm * sqrt(n-2)/sqrt(1-cor.sg.perm^2)
}
#
# Print a few lines (nn lines) of the output matrix
if(nn.print>0) print(Stat.out[1:nn.print,])
})
aa[3] <- sprintf("%2f", aa[3])
if (clock) cat("Time for computation =", aa[3], " sec\n")
# Compute p-value
p.val = length(which(Stat.out[,2] >= Stat.out[1,2]))/(nperm+1)
list(Richness=Richness.emp, Stat.out=Stat.out, p.val=p.val,Richness.perm=Richness)
}
genera = c("Alop", "Alop", "Alop", "Arct", "Arct", "Aulo", "Pard", "Pard", "Pard", "Pard", "Troc", "Zora")
res1 = simul.type1.dbd(nsim=100, g=genera, gen.method=1,perm.method=1)
head(res1)
res1$p.val
length(which(res1$p.val <= 0.05))
length(which(res1$p.val <= 0.10))
length(which(res1$p.val <= 0.20))
length(which(res1$p.val <= 0.50))
mean(res1$p.val)