-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path2_serum_association_analysis.R
132 lines (118 loc) · 7.16 KB
/
2_serum_association_analysis.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
# Script to find serum metabolites that changed post MMKD and AHAD.
# Generate files with association results
# clear workspace
rm(list = setdiff(ls(),c("codes.makepath","data.makepath","results.makepath")))
# set working directory
setwd(dirname(rstudioapi::getActiveDocumentContext()$path))
# libraries
library(openxlsx) # for excel reading and writing
library(maplet) # MT
library(lme4) # mixed effect models
library(tidyverse) # %>%
library(glue) #> formula
source("custom_functions.R") # customized functions
# define empty results list
res <- list()
# input files
data_file <- 'results/tmp_BEAM_Serum_Nightingale_preprocessed.xlsx'
# output files
serum_associations <- 'results/supplementary_table_X_BEAM_Serum_Nightingale_analysis.xlsx'
serum_aha <- 'results/tmp_BEAM_Serum_Nightingale_preprocessed_matched_AHAD.xlsx'
serum_keto <- 'results/tmp_BEAM_Serum_Nightingale_preprocessed_matched_MMKD.xlsx'
# data load
D <- mt_load_se_xls(file=data_file) %>%
# remove samples without age and sex
mt_modify_filter_samples(filter=!is.na(Age)) %>%
mt_modify_filter_samples(filter=!is.na(Sex)) %>%
{.}
# modify annotation columns
D1 <- D %>%
# modifying outcome columns
mt_anno_mutate(anno_type = 'samples', col_name = 'diag', term=
case_when(MemDx=='NC' ~0, MemDx=='NC-SC'~0,
MemDx=='MCI' ~1, MemDx=='MCI-A'~1,MemDx=='MCI-MDA'~1,
TRUE~NA_real_))%>%
mt_anno_mutate(anno_type = 'samples', col_name='diag', term=as.factor(as.matrix(diag))) %>%
mt_anno_mutate(anno_type = 'samples', col_name='Age', term=as.numeric(as.matrix(Age))) %>%
mt_anno_mutate(anno_type = 'samples', col_name='Sex', term=as.factor(as.matrix(Sex))) %>%
# create group definitions
mt_anno_mutate(anno_type = 'samples', col_name = 'keto_group',
term=case_when((first_diet=="keto" & timepoint_NG_serum=='PREA-COG')~0,
(first_diet=="keto" & timepoint_NG_serum=='POSTA-COG')~1,
(first_diet=="aha" & timepoint_NG_serum=='PREB-COG')~0,
(first_diet=="aha" & timepoint_NG_serum=='POSTB-COG')~1,
TRUE~ NA_real_))%>%
mt_anno_mutate(anno_type = 'samples', col_name = 'aha_group',
term=case_when((first_diet=="aha" & timepoint_NG_serum=='PREA-COG')~0,
(first_diet=="aha" & timepoint_NG_serum=='POSTA-COG')~1,
(first_diet=="keto" & timepoint_NG_serum=='PREB-COG')~0,
(first_diet=="keto" & timepoint_NG_serum=='POSTB-COG')~1,
TRUE~ NA_real_))
# select keto samples
D11 <- mt_modify_filter_samples(D1, filter=!is.na(keto_group))
D11 %<>% # select paired samples
mt_modify_filter_samples(filter=SubjectID %in% (D11$SubjectID[which(duplicated(D11$SubjectID))]))%>%
# remove patient with no diagnosis
mt_modify_filter_samples(filter=!is.na(diag)) %>%
# write out the samples analysed
mt_write_se_xls(file=serum_keto) %>%
{.}
tmp <- get_diet_fc(D=D11, id_col = "SubjectID", grp_col='keto_group')
# scaling
D11 %<>% mt_pre_trans_scale(center = T)
# keto analysis results
res[['keto_group']] <- association_analysis(D=D11, outcome = 'keto_group', outcome_type = 'twofactor',
int_w_analyte = "diag",
conf_formula = "Age + Sex + (1|SubjectID)",
all_vals=T)
res$keto_group <- left_join(res$keto_group, tmp, by=c('name'='met'))
# select aha samples
D11 <- mt_modify_filter_samples(D1, filter=!is.na(aha_group))
D11 %<>% # select paired samples
mt_modify_filter_samples(filter=SubjectID %in% (D11$SubjectID[which(duplicated(D11$SubjectID))])) %>%
# remove patient with no diagnosis
mt_modify_filter_samples(filter=!is.na(diag)) %>%
# write out the samples analysed
mt_write_se_xls(file=serum_aha) %>%
{.}
tmp <- get_diet_fc(D=D11, id_col = "SubjectID", grp_col='aha_group')
# scaling
D11 %<>% mt_pre_trans_scale(center = T)
# aha results
res[['aha_group']] <- association_analysis(D=D11, outcome = 'aha_group', outcome_type = 'twofactor',
int_w_analyte = "diag",
conf_formula = "Age + Sex + (1|SubjectID)",
all_vals=T)
res$aha_group <- left_join(res$aha_group, tmp, by=c('name'='met'))
# write out the results
sheet_names <- list()
sheet_names[['keto_group']] <- 'keto serum'; sheet_names[['aha_group']] <- 'aha serum';
wb <- openxlsx::createWorkbook()
# loop over outcomes
for(out in c('keto_group', 'aha_group')){
# results of this outcome
model <- 'metabolite ~ prepost + diagnosis + age + sex + prepost:diagnosis + (1|subjectID)'
this_res <- res [[out]] %>% .[order(.$adj_p), ] %>% mutate(model= model) %>%
select(-analyte, -outcome, -covariates) %>%
select(Biomarker_name, Group, Subgroup, name, model, estimate, std_error, df, statistic, p_value, adj_p, outcome_lfc_ci_upper, outcome_lfc_ci_mean, outcome_lfc_ci_lower, diag1_estimate, diag1_std_error, diag1_df, diag1_statistic, diag1_p_value, diag1_adj_p,
outcome.diag_estimate, outcome.diag_std_error, outcome.diag_df, outcome.diag_statistic, outcome.diag_p_value, outcome.diag_adj_p,
Age_estimate, Age_std_error, Age_df, Age_statistic, Age_p_value, Age_adj_p, Sex2_estimate, Sex2_std_error, Sex2_df, Sex2_statistic, Sex2_p_value, Sex2_adj_p)
names(this_res) <- c("Biomarker_name", "Group", "Subgroup", "name", "model", "prepost_estimate", "prepost_std_error", "prepost_df", "prepost_statistic", "prepost_p_value", "prepost_adj_p", "prepost_lfc_ci_upper", "prepost_lfc_ci_mean", "prepost_lfc_ci_lower", "diagnosis_estimate", "diagnosis_std_error", "diagnosis_df", "diagnosis_statistic", "diagnosis_p_value", "diagnosis_adj_p", "prepost:diagnosis_estimate", "prepost:diagnosis_std_error", "prepost:diagnosis_df", "prepost:diagnosis_statistic", "prepost:diagnosis_p_value", "prepost:diagnosis_adj_p", "age_estimate", "age_std_error", "age_df", "age_statistic", "age_p_value", "age_adj_p", "sex_estimate", "sex_std_error", "sex_df", "sex_statistic", "sex_p_value", "sex_adj_p" )
out <- sheet_names[[out]]
# create worksheet
openxlsx::addWorksheet(wb,sprintf('%s', out))
# write data
openxlsx::writeData(wb, sprintf('%s', out), this_res,rowNames = F, colNames = T)
# create and add a style to the column headers
headerStyle <- createStyle(fontName = 'Arial', fontSize = 12, halign = 'center', valign = 'center', textDecoration = 'bold')
# style for body
bodyStyle <- createStyle(fontName = 'Arial', fontSize = 12, halign = 'center', valign = 'center')
# apply style
addStyle(wb, sheet = sprintf('%s', out), bodyStyle, rows = 1:(nrow(this_res)+1), cols = 1:ncol(this_res), gridExpand = TRUE)
addStyle(wb, sheet = sprintf('%s', out), headerStyle, rows = 1, cols = 1:ncol(this_res), gridExpand = TRUE)
}
# write workbook
openxlsx::saveWorkbook (wb, file=serum_associations, overwrite=TRUE)
## finished
print("Done! serum metabolomics analysis with diet-related groups completed.")
print("Generated excel file with supplementary tables in results folder!")