-
Notifications
You must be signed in to change notification settings - Fork 8
/
model_onextalk.r
59 lines (48 loc) · 1.94 KB
/
model_onextalk.r
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
# point of this file:
# - use the zscores to create a linear model
library(dplyr)
b = import('base')
io = import('io')
#' Fits a linear model on Z-scores
#'
#' @param zdata A list with the zscore matrix and index object
#' @return The coefficients matrix [gene x pathway]
zscore2model = function(zdata, hpc_args=NULL) {
b = import('base')
ar = import('array')
df = import('data_frame')
st = import('stats')
index = zdata$index
zscores = t(zdata$zscores)
# create indicator matrix which pathway is activated/inhibited
pathway = index$sign * (ar$mask(index$pathway) + 0)
pathway[,"MAPK"] = pathway[,"MAPK"] + pathway[,"EGFR"]
pathway[,"PI3K"] = pathway[,"PI3K"] + pathway[,"EGFR"]
pathway[,"NFkB"] = pathway[,"NFkB"] + pathway[,"TNFa"]
# add EGFR>PI3K link here?
# if using intercept (does this make a difference?)
# pathway = cbind('(Intercept)'=1, pathway)
mod = st$lm(zscores ~ 0 + pathway, min_pts=30, atomic="pathway",
hpc_args=hpc_args) %>%
transmute(gene = zscores,
pathway = sub("^pathway", "", term),
zscore = estimate,
p.value = p.value) %>%
filter(pathway != "(Intercept)") %>%
mutate(adj.p = p.adjust(p.value, method="fdr"))
zfit = ar$construct(zscore ~ gene + pathway, data=mod, fill=0)
pval = ar$construct(p.value ~ gene + pathway, data=mod, fill=1)
# filter zfit to only include top 100 genes per pathway
model = zfit
model[apply(pval, 2, function(p) !b$min_mask(p, 100))] = 0
list(assocs=mod, model=model)
}
if (is.null(module_name())) {
ZDATA = commandArgs(TRUE)[1] %or% "../data/zscores.RData"
OUTFILE = commandArgs(TRUE)[2] %or% "model_matrix.RData"
# load speed data, index; filter for train set only
zdata = io$load(ZDATA)
result = zscore2model(zdata, hpc_args=list(n_jobs=20, memory=2048))
# save resulting object
save(result, file=OUTFILE)
}