This repository has been archived by the owner on Jul 2, 2021. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path_generate_models.py
165 lines (148 loc) · 3.96 KB
/
_generate_models.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
"""
A programatic way of specifying the models and serialize them as JSON
"""
import json
from typing import Dict
from src.conf import Model
# Define models
models: Dict[str, Model] = dict()
# Model 1: compare normals vs patients - major categorical factors + sex + age
categories = ["sex", "COVID19"] # "patient",
continuous = ["age"]
technical = ["processing_batch_continuous"]
variables = categories + continuous + technical
formula = None
model_name = "1-general"
model = Model(
covariates=variables,
categories=categories,
continuous=continuous,
formula=formula,
)
models[model_name] = model
# Model 2: look deep into patients
categories = [
"sex",
"race",
"COVID19",
"severity_group",
"hospitalization",
"intubation",
"death",
"diabetes",
"obesity",
"hypertension",
]
continuous = ["age", "time_symptoms"] # "bmi", "time_symptoms"]
technical = ["processing_batch_continuous"]
variables = categories + continuous + technical
formula = None
model_name = "2-covid"
model = Model(
covariates=variables,
categories=categories,
continuous=continuous,
formula=formula,
)
models[model_name] = model
# Model 3: look at changes in treatment
categories = [
"severe", # <- this is a special one which simply selects for this group
"sex",
"race",
"tocilizumab",
]
continuous = [
"age",
]
technical = [
# "processing_batch_continuous"
]
variables = categories + continuous + technical
formula = None
model_name = "3-treatment"
model = Model(
covariates=variables,
categories=categories,
continuous=continuous,
formula=formula,
)
models[model_name] = model
# Model 4+: interactions of sex with other factors
f = ["severity_group", "death", "hospitalization", "intubation", "tocilizumab"]
for factor in f:
categories = [
factor,
"sex",
"race",
]
continuous = []
technical = []
variables = categories + continuous + technical
model_name = f"4-interaction_sex_{factor}"
model = Model(
covariates=variables,
categories=categories,
continuous=continuous,
formula=f"~ sex * {factor}",
)
models[model_name] = model
# Model 5a: compare convalescent vs mild - major categorical factors + sex + age
categories = [
"negative_mild", # <- this is a special one which simply selects for these groups
"severity_group",
"sex",
# "race", # <- here I can't include race because it is not available in controls
]
continuous = ["age"]
technical = ["processing_batch_continuous"]
variables = categories + continuous + technical
formula = None
model_name = "5a-negative_mild"
model = Model(
covariates=variables,
categories=categories,
continuous=continuous,
formula=formula,
)
models[model_name] = model
# Model 5b: compare convalescent vs mild - major categorical factors + sex + age
categories = [
"mild_convalescent", # <- this is a special one which simply selects for these groups
"severity_group",
"sex",
"race",
]
continuous = ["age"]
technical = ["processing_batch_continuous"]
variables = categories + continuous + technical
formula = None
model_name = "5b-mild_convalescent"
model = Model(
covariates=variables,
categories=categories,
continuous=continuous,
formula=formula,
)
models[model_name] = model
# Model 6: look at temporal changes in patients
# # This is just confirmatory of the same in model 2 but this time accounts for treatment
categories = [
"mild_severe", # <- this is a special one which simply selects for these groups
"sex",
"race",
"tocilizumab",
]
continuous = ["age", "time_symptoms"]
technical = ["processing_batch_continuous"]
variables = categories + continuous + technical
formula = None
model_name = "6-time_symptoms"
model = Model(
covariates=variables,
categories=categories,
continuous=continuous,
formula=formula,
)
models[model_name] = model
json.dump(models, open("metadata/model_specifications.json", "w"), indent=4)