-
Notifications
You must be signed in to change notification settings - Fork 0
/
initial_benchmark_framework_ukb_genomes.Rmd
114 lines (88 loc) · 2.75 KB
/
initial_benchmark_framework_ukb_genomes.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
---
title: "Initial Benchmark Framework"
output:
html_notebook:
toc: true
---
```{r setup}
source("project_settings.R")
source("functions.R")
```
```{r}
plot_data <- NA
```
December 28, 2019
https://github.com/large-scale-gxe-methods/gem-workflow/blob/master/gem_workflow.wdl
# Current WDL
Note, in google cloud, the size of the local-disk is ignored. 'local-disk' is the best option for I/O intensive applications.
```
task run_tests {
File genofile
Float? maf = 0.001
File? samplefile
File phenofile
String sample_id_header
String outcome
Boolean binary_outcome
String covar_headers
String int_covar_num
String? delimiter = ","
String? missing = "NaN"
Boolean robust
Int? stream_snps = 20
Float? tol = 0.000001
Int? memory = 10
Int? cpu = 4
Int? disk = 20
String pheno = if binary_outcome then "1" else "0"
String robust01 = if robust then "1" else "0"
command {
echo -e "SAMPLE_ID_HEADER\n${sample_id_header}\n"\
"PHENOTYPE\n${pheno}\n"\
"PHENO_HEADER\n${outcome}\n"\
"COVARIATES_HEADERS\n${covar_headers}\n"\
"MISSING\n${missing}\n"\
"ROBUST\n${robust01}\n"\
"STREAM_SNPS\n${stream_snps}\n"\
"NUM_OF_INTER_COVARIATE\n${int_covar_num}\n"\
"LOGISTIC_CONVERG_TOL\n${tol}\n"\
"DELIMINATOR\n${delimiter}\n"\
"GENO_FILE_PATH\n${genofile}\n"\
"PHENO_FILE_PATH\n${phenofile}\n"\
"SAMPLE_FILE_PATH\n${samplefile}\n"\
"OUTPUT_PATH\ngem_res"\
> GEM_Input.param
echo "" > resource_usage.log
dstat -c -d -m --nocolor 10 1>>resource_usage.log &
/GEM/GEM -param GEM_Input.param -maf ${maf}
}
runtime {
docker: "quay.io/large-scale-gxe-methods/gem-workflow"
memory: "${memory} GB"
cpu: "${cpu}"
disks: "local-disk ${disk} HDD"
}
output {
File param_file = "GEM_Input.param"
File out = "gem_res"
File resource_usage = "resource_usage.log"
}
}
```
# UK Biobank - some chromosomes; Hba1c analysis; dichotomous dPC1 interaction; other covariates
https://app.terra.bio/#workspaces/largescale-gxe-implementation/ukb_gene_diet/job_history/7fc63f9c-393c-41f5-8d13-cf408a340b27
## Stream SNPs: 20
https://job-manager.dsde-prod.broadinstitute.org/jobs/61b4af67-67bd-4099-9d7c-00fdf886e5b5
```{r fig.height=14,fig.width=8,results="asis"}
plot_data <- do_plots(path_to_workflow = "gs://fc-secure-006196ce-f92c-4096-80cf-80a7277524dc/7fc63f9c-393c-41f5-8d13-cf408a340b27/run_GEM/61b4af67-67bd-4099-9d7c-00fdf886e5b5/call-run_tests",
cpus=8,
disk="25G",
memory="25G",
maf=0.005,
stream_snps=20,
run_tests_runtime="longest chr: 43h 52m",
plot_description="UK Biobank\nlargescale-gxe-implementation/ukb_gene_diet\nSubmitted: Dec 30, 2019, 11:51 PM",plot_data=plot_data)
```
```{r}
save(plot_data,file="initial_benchmark_framework_ukb_genomes.RData")
```