-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathPractical_sol.R
141 lines (103 loc) · 4.02 KB
/
Practical_sol.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
rm(list=ls())
##-- Function to analytical estimate the parameters of a lognormal distribution given their moments
param.log.normal <- function(m,v){
par.loc <- log(m^4/(v+m^2))/2
par.scale <- sqrt(log(v/m^2 + 1))
return(c(par.loc,par.scale))
}
##-- Parameters for log-Normal function used in the script
par1 <- param.log.normal(m=1,v=4^2)
par2 <- param.log.normal(m=2,v=4^2)
# Are they skewed?
curve(dlnorm(x,par1[1],par1[2]),0,8,col=2,lwd=2)
curve(dlnorm(x,par2[2],par2[2]),0,8,col=2,lwd=2)
#-------------------------------------
#
# Simple example with separate stages
#
#-------------------------------------
######################################
# A: Aims
######################################
# Comparing the performance of the two-sample t test
# under skew data between unbalanced groups
######################################
# D: Data generating mechanism
######################################
##-- Parameters
N <- 30 # Total sample sizes
Nsim <- 1000
##-- Set a seed for reproducibility
set.seed(2008)
##-- Factor with 2 categories
x <- sample(1:2,N*Nsim,rep=TRUE,prob=c(2/3,1/3)) # Factor with a P(X=1)=2/3 & P(X=2)=1/3
##-- Response
y <- numeric(N*Nsim)
# y[x==1] <- round(rlnorm(sum(x==1),meanlog = -log(2)/2, sdlog = sqrt(log(2))),3) # E[Y|X=1] = 1 & V[Y|X=1] = 1
# y[x==2] <- round(rlnorm(sum(x==2),meanlog = log(16/5)/2, sdlog = sqrt(log(5/4))),3) # E[Y|X=2] = 2 & V[Y|X=2] = 1
y[x==1] <- round(rlnorm(sum(x==1),meanlog = par1[1], sdlog = par1[2]),3) # E[Y|X=1] = 1 & V[Y|X=1] = 4
y[x==2] <- round(rlnorm(sum(x==2),meanlog = par2[1], sdlog = par2[2]),3) # E[Y|X=2] = 2 & V[Y|X=2] = 4
##-- Store in a matrix
X <- matrix(x,ncol=Nsim)
Y <- matrix(y,ncol=Nsim)
######################################
# M: T-test
######################################
TTEST <- list()
for (i in 1:Nsim) TTEST[[i]] <- t.test(Y[,i]~X[,i],var.equal=TRUE)
######################################
# E: Estimands --> IC95
######################################
IC95 <- matrix(nrow=Nsim,ncol=2)
colnames(IC95) <- c('LL','UL')
for (i in 1:Nsim) IC95[i,1:2] <- TTEST[[i]]$conf.int
######################################
# P: Performance --> Coverage
######################################
MD <- -1 # True Mean Difference [MD = E(Y|X=1) - E(Y|X=2) = -1]
for (i in 1:Nsim)
included <- (MD>IC95[,'LL'] & MD<IC95[,'UL'])
##-- Point estimate of the coverage
point.coverage <- prop.table(table(included))[2]
point.coverage
##-- CI95% of the coverage
prop.test(x=sum(included),n=length(included))
#-------------------------------------
#
# Example using Montecarlo package
#
#-------------------------------------
library(MonteCarlo)
##-- T.test function
# N: Total sample size
# SD: Standard deviation for ecah sample
# P: Proportion assigned to arm 2
ttest <- function(N,SD,P){
##-- Generate data
par1 <- param.log.normal(m=1,v=SD^2)
par2 <- param.log.normal(m=2,v=SD^2)
# Factor
x <- sample(1:2,N,rep=TRUE,prob=c(1-P,P))
# Response
y <- numeric(N)
y[x==1] <- round(rlnorm(sum(x==1),meanlog = par1[1], sdlog = par1[2]),3)
y[x==2] <- round(rlnorm(sum(x==2),meanlog = par2[1], sdlog = par2[2]),3)
##-- Calculate test statistic
MD <- (-1)
tt <- t.test(y~x,var.equal = TRUE)
included <- MD>tt$conf.int[1] & MD<tt$conf.int[2]
# return result:
return(list("Coverage"=included))
}
######################################
# AIO: All In ONE
######################################
##-- Parameters grid
N_grid <- c(30,50,100)
SD_grid <- 1:5
P_grid <- c(0.3,0.5)
param_list <- list("N"=N_grid,"SD"=SD_grid,"P"=P_grid)
##-- Run simulation
set.seed(2008)
MC_res <- MonteCarlo(func=ttest, nrep=1000, param_list=param_list)
MakeTable(output=MC_res, rows=c('SD','N'), cols='P', digits=2, include_meta=FALSE)