-
Notifications
You must be signed in to change notification settings - Fork 0
/
GxE_phenotype_QC.Rmd
141 lines (122 loc) · 4.07 KB
/
GxE_phenotype_QC.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
---
output: html_document
title: Phenotype QC for gene-environment interaction testing
params:
phenotype_file: "path_to_phenotype_file"
outcome: "outcome_name"
exposure: "exposure_name"
groups: "character_vector_of_group_names"
---
<!--
Run using: rmarkdown::render("GxE_phenotype_QC.Rmd", output_file="myfile.html",
params=list(phenotype_file="my_phenofile.csv"))
-->
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo=FALSE, message=FALSE, warning=FALSE, dev="png")
suppressMessages(silent <- lapply(
c("knitr", "kableExtra", "data.table", "tidyverse"),
library, character.only=TRUE))
theme_set(theme_bw())
```
```{r load-data}
if (grepl("\\.rds", params$phenotype_file)) { # Binary .rds input file
phenos <- readRDS(params$phenotype_file)
} else if (grepl("\\.RData", params$phenotype_file, ignore.case=TRUE)) { # Binary .RData input file
phenos <- get(load(params$phenotype_file))
} else { # Otherwise, assume a flat file and let fread guess the delimiter
phenos <- fread(params$phenotype_file, stringsAsFactors=FALSE, data.table=FALSE)
}
phenos <- filter(phenos, !is.na(.data[[outcome]]), !is.na(.data[[exposure]]))
if (length(unique(phenos[[params$outcome]])) <= 2) { # Outcome is binary
bin_outcome <- TRUE
phenos[[params$outcome]] <- factor(phenos[[params$outcome]])
} else {
bin_outcome <- FALSE
}
if (length(unique(phenos[[params$exposure]])) <= 2) { # Exposure is binary
bin_exposure <- TRUE
phenos[[params$exposure]] <- factor(phenos[[params$exposure]])
} else {
bin_exposure <- FALSE
}
phenos <- mutate_at(phenos, vars(all_of(params$groups)), factor) # All stratification groups as factors
outcome <- params$outcome
exposure <- params$exposure
```
```{r define-functions}
make_hist <- function(pheno, df) {
stat_type <- if (is.factor(df[[pheno]])) "count" else "bin"
df %>%
filter(!is.na(.data[[pheno]])) %>%
ggplot(aes_string(x=pheno)) +
geom_histogram(stat=stat_type)
}
make_exposure_outcome_plt <- function(pheno, xvar, df) {
if (!bin_outcome & !bin_exposure) {
ggplot(df, aes_string(x=xvar, y=pheno)) +
geom_smooth()
} else if (bin_outcome & !bin_exposure) {
ggplot(df, aes_string(x=pheno, y=xvar)) +
geom_boxplot()
} else if (!bin_outcome & bin_exposure) {
ggplot(df, aes_string(x=xvar, y=pheno)) +
geom_boxplot()
} else {
ggplot(df, aes_string(x=pheno, fill=xvar)) +
geom_histogram(stat="count", position="dodge")
}
}
make_grouped_hist <- function(pheno, grp, df) {
df <- filter(df, !is.na(.data[[pheno]]))
if (is.factor(df[[pheno]])) {
df %>%
group_by_at(grp) %>%
summarise(fraction = sum(.data[[pheno]] == 1) / n()) %>%
ggplot(aes_string(x=grp, y="fraction")) +
geom_bar(stat="identity", position="dodge", width=0.8)
} else {
df %>%
mutate_at(grp, ~factor(.)) %>%
ggplot(aes_string(x=pheno, fill=grp)) +
geom_histogram(alpha=0.3, position="identity")
}
}
test_assoc <- function(pheno, xvar, df) {
form <- as.formula(paste0(pheno, " ~ ", xvar))
fam <- if(is.factor(df[[pheno]])) "binomial" else "gaussian"
broom::tidy(glm(form, data=df, family=fam))
}
```
# Outcome & exposure
```{r raw-histograms}
outcome_hist <- make_hist(outcome, phenos)
exposure_hist <- make_hist(exposure, phenos)
outcome_hist
exposure_hist
```
```{r exposure-outcome-relationship}
make_exposure_outcome_plt(outcome, exposure, phenos)
test_assoc(outcome, exposure, phenos)
```
# Outcome & strata
```{r group-outcome-relationships}
for (grp in params$groups) {
print(make_grouped_hist(outcome, grp, phenos))
print(test_assoc(outcome, grp, phenos))
}
```
# Exposure & strata
```{r group-exposure-relationships}
for (grp in params$groups) {
print(make_grouped_hist(exposure, grp, phenos))
print(test_assoc(exposure, grp, phenos))
}
```
--------------------------------------------------------------------------------
```{r dump-parameters}
tibble(Parameter=names(params),
Value=unlist(lapply(params, paste, collapse=", "))) %>%
kable(booktabs=TRUE,
caption="QC report parameters") %>%
kable_styling(full_width=FALSE)
```