-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path1_between_ards.R
116 lines (102 loc) · 4.85 KB
/
1_between_ards.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
# Script to compare two ARDS etiologies per omic
# Generates supplementary files 2 and 3
### SET UP --------
## clear workspace and set directories ----
rm(list = ls())
setwd(dirname(rstudioapi::getActiveDocumentContext()$path))
dir.create("results/", showWarnings = F, recursive = T)
## libraries ----
library(maplet)
library(tidyverse)
source('custom_functions.R')
# input variables ----
datasets <- c("plasma_metabo", 'plasma_lipids', "plasma_proteo")
pwgroups <- list(plasma_lipids='Class',
plasma_metabo='SUB_PATHWAY',
plasma_proteo='kegg_db')
# groups to compare
complst <- list(comps = list(c("Group","Co19-ARDS","Bact-Seps")),
check_groups = c("Bact-Seps", "Co19-ARDS"))
# adjusted p-value cutoff
pcut <- 0.05
#### MAIN --------
stats_list <- path_list <- list()
### load, prepare, and perform between-ards analysis for each dataset ----
for (dataset in datasets) {
tmp_stats_file <- sprintf("results/tmp_%s_between_stats.xlsx", dataset)
tmp_path_file <- sprintf("results/tmp_%s_between_pathstats.xlsx", dataset)
# load processed data
D <- mt_load_se_xls(file=paste0('input/', dataset, '_processed.xlsx')) %>%
# flag that data is logged
mt_load_flag_logged()
# get pathway annotations if there is an UniProt column (used for proteomics)
if ("UniprotID" %in% (D %>% rowData() %>% colnames())) {
D %<>%
mt_anno_pathways_uniprot(
in_col = "UniprotID",
out_col = "kegg_db") %>%
mt_anno_pathways_remove_redundant(feat_col = "UniprotID", pw_col = "kegg_db") %>%
mt_write_pathways(pw_col = "kegg_db", file = 'results/supplementary_table_2_sheet4_proteins_kegg_pathway_annotations.xlsx')
}
## perform analysis per comparison ----
for (comp in complst$comps) {
compnamebase <- paste0(comp[-1], collapse = "_")
D %<>% between_ards_comparison(comp_name = sprintf("%s", compnamebase),
comp_info = comp,
pwgroup = pwgroups[[dataset]],
p_adj_cut=pcut,
path_outfile=tmp_path_file)
}
write_stats(D, out_file = tmp_stats_file)
## format the dataset output excel files ----
# read intermediate outputs
tmp_stats <- read.xlsx(tmp_stats_file)
tmp_path <- read.xlsx(tmp_path_file, sheet='IndividualResults')
# for each dataset, select desired columns from statistical result and pathway annotation outputs
# rename columns from each where necessary
if(dataset=='plasma_proteo'){
tmp_stats %<>% select(name, outcome, estimate, std_error, statistic, fold_change, p_value, adj_p, effect_high_in,
OlinkID, UniprotID, Panel, Panel_Version, kegg_db) %>%
dplyr::rename(KEGG_Pathway_IDs=kegg_db)
tmp_path %<>% select(name, pathway, pathway_id, estimate, std.error, statistic,
fc, p.value, p.adj) %>%
dplyr::rename(KEGG_ID=pathway_id, std_error=std.error, fold_change=fc,
p_value=p.value, adj_p=p.adj)
} else if (dataset=='plasma_metabo') {
tmp_stats %<>% select(name, outcome, estimate, std_error,
statistic, fold_change, p_value, adj_p, effect_high_in,
SUPER_PATHWAY, SUB_PATHWAY, COMP_ID, PUBCHEM, CAS, KEGG, HMDb)%>%
dplyr::rename(HMDB=HMDb)
tmp_path %<>% select(name, pathway, color, estimate,
std.error, statistic, fc, p.value, p.adj) %>%
dplyr::rename(SUB_PATHWAY=pathway,
SUPER_PATHWAY=color,
std_error=std.error,
fold_change=fc,
p_value=p.value,
adj_p=p.adj)
}else if (dataset=='plasma_lipids') {
tmp_stats %<>% select(name, outcome, estimate, std_error,
statistic, fold_change, p_value, adj_p, effect_high_in,
Class)%>%
dplyr::rename(Lipid_Class=Class)
tmp_path %<>% select(name, pathway, estimate,
std.error, statistic, fc, p.value, p.adj) %>%
dplyr::rename(Lipid_Class=pathway,
std_error=std.error,
fold_change=fc,
p_value=p.value,
adj_p=p.adj)
} else{
break('unidentified dataset!')
}
path_list[[dataset]] <- tmp_path
stats_list[[dataset]] <- tmp_stats
} # END dataset loop
names(path_list) <- names(stats_list) <- c('Metabolomics', 'Lipidomics', 'Proteomics')
### write supplementary files ----
write_dataframes (df_list=stats_list, out_file='results/supplementary_table_2_between_ards_stats.xlsx')
write_dataframes (df_list=path_list, out_file='results/supplementary_table_3_between_ards_pathway_annotations.xlsx')
### finished ----
print("Done! Finished per omics comparison of ARDS etiologies!")
print("Generated excel files with supplementary tables 2 and 3 in results folder!")