-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathworked_example_sw.R
163 lines (135 loc) · 5.21 KB
/
worked_example_sw.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
library(tidyverse)
setwd("C:/Users/steven.wyatt/OneDrive - Midlands and Lancashire CSU/work/jg iod ci code")
load("C:/Users/steven.wyatt/OneDrive - Midlands and Lancashire CSU/work/jg iod ci code/sample_data.RData")
# Steven's version
sw_df <- one_metric_example_data |>
dplyr::select(numerator, denominator) |>
mutate(rate = numerator / denominator) |>
mutate(se_rate = sqrt(numerator) / denominator) |>
mutate(global_rate = sum(numerator) / sum(denominator)) |>
mutate(se_global_rate = sqrt(sum(numerator) / sum(denominator))) |>
mutate(rel_iod = sum(abs(rate - global_rate) * denominator) / (2 * sum(numerator)))
set.seed(1703)
iod_samples <- vector()
for (i in (1:1000))
{
iod_i <-
sw_df |>
ungroup() |>
mutate(sample_rate = rnorm(5, rate, se_rate)) |>
mutate(sample_numerator = sample_rate * denominator) |>
summarise(sample_rel_iod = sum(abs(sample_rate - global_rate) * denominator) / (2 * sum(sample_numerator))) |>
pull()
iod_samples <- append(iod_samples, iod_i)
}
iod_ci95 <- quantile(iod_samples, c(0.025, 0.975))
iod_ci95
compute_iod <- function(one_metric_data) {
# Compute index of disparity
data <- one_metric_example_data|>
mutate(upper_ci=(numerator+(sqrt(numerator)*1.96))*(1/denominator),
lower_ci=(numerator-(sqrt(numerator)*1.96))*(1/denominator),
rate=numerator/denominator,
se=(rate-upper_ci)/1.96,
sd=(rate-mean(numerator)),
global_rate=sum(numerator)/sum(denominator),
diff = rate-global_rate,
abs_diff=abs(diff*denominator))
iod_abs <- sum(data$abs_diff)/2
iod_rel <- sum(data$abs_diff)/(2*sum(data$numerator))
return(iod_rel)
}
iod <- compute_iod(one_metric_example_data)
# Jacqueline's version
# one_metric_example_data <- tar_read(activity_by_type_clusters_stg6) |>
# as_tibble()|>
# select(cluster2,metric1_total,starts_with("metric29b"))|>
# rename(denominator=metric1_total,numerator=metric29b_total)
# Function to compute index of disparity
compute_id <- function(one_metric_example_data) {
# Compute index of disparity
data <- one_metric_example_data|>
mutate(upper_ci=(numerator+(sqrt(numerator)*1.96))*(1/denominator),
lower_ci=(numerator-(sqrt(numerator)*1.96))*(1/denominator),
rate=numerator/denominator,
se=(rate-upper_ci)/1.96,
sd=(rate-mean(numerator)),
global_rate=sum(numerator)/sum(denominator),
diff = rate-global_rate,
abs_diff=abs(diff*denominator))
iod_abs <- sum(data$abs_diff)/2
iod_rel <- sum(data$abs_diff)/(2*sum(data$numerator))
return(iod_rel)
}
data <- one_metric_example_data
i<-1
n<-as.numeric(nrow(data))
for (i in 1:n){
num=data$numerator[i]
denom=data$denominator[i]
rate=num/denom
upper_ci=(num+(sqrt(num)*1.96))*(1/denom)
se=(upper_ci-rate)/1.96
rates <- rnorm(n=1000,rate,se)
if(i==1)
{ rate_clusters <- as.data.frame(rates) }
else
{ rate_clusters <- rate_clusters|> bind_cols(as.data.frame(rates))}
i=i+1
}
rate_clusters <- rate_clusters |>
rename(rates1=`rates...1`,
rates2=`rates...2`,
rates3=`rates...3`,
rates4=`rates...4`,
rates5=`rates...5`
)|>
mutate(num1=rates1*data$denominator[1],
num2=rates2*data$denominator[2],
num3=rates3*data$denominator[3],
num4=rates4*data$denominator[4],
num5=rates5*data$denominator[5],
denom1=data$denominator[1],
denom2=data$denominator[2],
denom3=data$denominator[3],
denom4=data$denominator[4],
denom5=data$denominator[5]
)
rate_clusters <- rate_clusters |>
mutate(upper_ci1=(num1+(sqrt(num1)*1.96))*(1/denom1),
lower_ci1=(num1-(sqrt(num1)*1.96))*(1/denom1),
rate1=num1/denom1,
se1=(rate1-upper_ci1)/1.96,
upper_ci2=(num2+(sqrt(num2)*1.96))*(1/denom2),
lower_ci2=(num2-(sqrt(num2)*1.96))*(1/denom2),
rate2=num2/denom2,
se2=(rate2-upper_ci2)/1.96,
upper_ci3=(num3+(sqrt(num3)*1.96))*(1/denom3),
lower_ci3=(num3-(sqrt(num3)*1.96))*(1/denom3),
rate3=num3/denom3,
se3=(rate3-upper_ci3)/1.96,
upper_ci4=(num4+(sqrt(num4)*1.96))*(1/denom4),
lower_ci4=(num4-(sqrt(num4)*1.96))*(1/denom4),
rate4=num4/denom4,
se4=(rate4-upper_ci4)/1.96,
upper_ci5=(num5+(sqrt(num5)*1.96))*(1/denom5),
lower_ci5=(num5-(sqrt(num5)*1.96))*(1/denom5),
rate5=num5/denom5,
se5=(rate5-upper_ci5)/1.96,
global_rate=(num1+num2+num3+num4+num5)/(denom1+denom2+denom3+denom4+denom5),
diff1 = rate1-global_rate,
abs_diff1=abs(diff1*denom1),
diff2 = rate2-global_rate,
abs_diff2=abs(diff2*denom2),
diff3 = rate3-global_rate,
abs_diff3=abs(diff3*denom3),
diff4 = rate4-global_rate,
abs_diff4=abs(diff4*denom4),
diff5 = rate5-global_rate,
abs_diff5=abs(diff5*denom5),
iod_abs=(abs_diff1+abs_diff2+abs_diff3+abs_diff4+abs_diff5)/2,
iod_rel=(abs_diff1+abs_diff2+abs_diff3+abs_diff4+abs_diff5)/(2*(num1+num2+num3+num4+num5)))
compute_id(one_metric_example_data)
quantile(rate_clusters$iod_rel, c(.025, .975))
## this gives the 90% CIs
## if you want the 95% CIs then this would be c(0.025, 0.975)