-
Notifications
You must be signed in to change notification settings - Fork 8
/
model_fullmat.r
52 lines (43 loc) · 1.6 KB
/
model_fullmat.r
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
# point of this file:
# - use the zscores to create a linear model
library(dplyr)
b = import('base', attach_operators=FALSE)
import('base/operators')
io = import('io')
ar = import('array')
st = import('stats')
#' Fits a linear model on Z-scores
#'
#' @param zdata A list with the zscore matrix and index object
#' @return The coefficients matrix [gene x pathway]
zscore2model = function(zdata, hpc_args=NULL) {
index = zdata$index
zscores = t(zdata$zscores) * index$sign
# fit model to pathway perturbations
pathway = t(ar$mask(index$pathway)) + 0
pathway["EGFR",] = pathway["EGFR",] + pathway["MAPK",] + pathway["PI3K",]
pathway["TNFa",] = pathway["TNFa",] + pathway["NFkB",]
mod = st$lm(zscores ~ 0 + pathway, data=index, min_pts=30, atomic="pathway",
hpc_args=hpc_args) %>%
transmute(gene = zscores,
pathway = sub("^pathway", "", term),
zscore = estimate,
p.value = p.value) %>%
mutate(adj.p = p.adjust(p.value, method="fdr"))
zfit = ar$construct(zscore ~ gene + pathway, data=mod)
pval = ar$construct(p.value ~ gene + pathway, data=mod)
model = zfit
list(assocs=mod, model=model)
}
## best x-val: 0.15, 1e-4 -> use which?
#zfit[abs(zfit) < 1 | pval > 1e-3 | is.na(zfit)] = 0
#print(colSums(zfit != 0))
if (is.null(module_name())) {
ZDATA = commandArgs(TRUE)[1] %or% "../data/zscores.RData"
OUTFILE = commandArgs(TRUE)[2] %or% "model_linear.RData"
# load speed data, index; filter for train set only
zdata = io$load(ZDATA)
result = zscore2model(zdata, hpc_args=list(n_jobs=10, memory=2048))
# save resulting object
save(result, file=OUTFILE)
}