-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathcellnetr_apply.R
268 lines (242 loc) · 7.19 KB
/
cellnetr_apply.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
cn_classify<-function
### run binary classifiers on expression data
(classList,
### result of running CN3_makeClassifier
expDat,
### expression data
cttComms
### list of tct->list(grn)
){
ctts<-names(classList);
ans<-matrix(0, nrow=length(ctts), ncol=ncol(expDat));
rownames(ans)<-ctts;
for(ctt in ctts){
#cat(ctt,"\n")
## xgenes<-cttComms[[ctt]][[1]];
xgenes<-cttComms[[ctt]];
myProbs<-predict(classList[[ctt]], t(expDat[xgenes,]), type='prob');
ans[ctt,]<-myProbs[,ctt];
}
colnames(ans)<-colnames(expDat);
ans;
# classification matrix
}
cn_apply<-function
### run CellNet on expQuery
(expQuery,
### expression matrix
stQuery,
### sample table
cnProc,
### cnProc object
dLevelQuery="description1"
### stQuery column name
){
# 08-07-14, set default to sample_name so that individual samples are plotted
if(length(dLevelQuery)==0 ){
dLevelQuery<-"sample_name";
}
else{
xx<-which(colnames(stQuery)==dLevelQuery);
if(length(xx)==0){
dLevelQuery<-"sample_name";
}
}
ctrlScores<-cnProc[['trainingScores']]; # subnet est scores in control -- normalized, df
normVals<-cnProc[['normVals']]; # average subnet est scores in control samples, (list of ctt->subnet ave)
minVals<-cnProc[['minVals']]; # min raw vals of grn establishments to shift by
rawCtrlScores<-cnProc[['raw_scores']];
tVals<-cnProc[['tVals']];
subNets<-cnProc[['grnList']];
classList<-cnProc[['classList']];
classWeight<-cnProc[['classWeight']];
exprWeight<-cnProc[['exprWeight']];
# score the query data
#cat("Scoring query data...\n")
scoresQuery<-cn_score(expQuery, subNets, tVals, minVals=minVals, classList=classList, classWeight=classWeight, exprWeight=exprWeight);
# normalize query scores
#cat("normalizing grn scores\n");
normScoresQuery<-cn_normalizeScores(normVals, scoresQuery, rownames(scoresQuery));
# classify the query data
#cat("Classifying query data ...\n");
myClass<-cn_classify(classList,expQuery,subNets);
colnames(myClass)<-as.vector(stQuery[,dLevelQuery])
# it's really useful to have the averaged training data, too
meanExpQuery<-as.matrix(GEP_makeMean(expQuery,as.vector(stQuery[,dLevelQuery]),type='mean'));
ans<-list(expQuery=expQuery,
stQuery=stQuery,
dLevelQuery=dLevelQuery,
queryScores=scoresQuery,
classRes=myClass,
normScoresQuery=normScoresQuery,
expMeanQuery=meanExpQuery);
class(ans)<-"cnRes";
ans;
# cnRes object
}
cn_normalizeScores<-function
### divide the query scores by the mean values in the training data.
(ctrlScores,
### a list of subnet->mean value, all subnets
queryScores,
### a matrix, rownames = subnet names, all subnets
subNets
### a vector of subnets names to use
){
ans<-matrix(0, nrow=length(subNets), ncol=ncol(queryScores));
rownames(ans)<-subNets
#subNets<-rownames(queryScores);
for(subNet in subNets){
cat(subNet,"\n")
ans[subNet,]<- queryScores[subNet,] / ctrlScores[[subNet]];
}
colnames(ans)<-colnames(queryScores);
ans;
### normalized expression matrix
}
cn_netScores<-function
### return the GRN establishment score for a given expression matrix
(expDat,
### expression matrix
genes,
### vector of genes in the GRN
tVals,
### tVals list of ctt->list(means->genes, sds->genes)
ctt,
### ctt as basis of reference
classList,
### list of classifiers to extract weights
classWeight=TRUE,
### whether to weight by classifier or Not
exprWeight=TRUE
### whether to weight by gene expression
){
cat("genes:",length(genes),"\n");
cat("classifier:", nrow(classList[[ctt]]$importance),"\n");
aMat<-matrix(0, nrow=length(genes), ncol=ncol(expDat));
rownames(aMat)<-genes;
weights<-rep(1, length(genes));
names(weights)<-genes;
if(exprWeight){
cat("expr weights\t");
meanVect<-unlist(tVals[[ctt]][['mean']][genes]);
weights<-(2**meanVect)/sum(2**meanVect);
}
if(classWeight){
cat("class weights\n")
classImp<-classList[[ctt]]$importance[genes,1];
weights<-weights*classImp;
}
for(gene in genes){
zzs<-cn_rawScore(expDat[gene,], tVals[[ctt]][['mean']][[gene]], tVals[[ctt]][['sd']][[gene]]);
aMat[gene,]<-zzs;
}
xscores<-apply(aMat, 2, weighted.mean, w=weights);
xscores;
# grn scores (normalized)
}
cn_rawScore<-function
### computes the raw score for a gene as xmax-abs(zscore). this makes better values higher.
(vect,
### a vector
mmean,
### mean value in training data
ssd
### standard deviation in training data
){
zcs<-zscore(vect, mmean, ssd);
xmax<-1000; # arbitrary, and corrected for later, but want some high enough that it should not be exceeded
xmax-abs(zcs);
# transformed (but not normalized) GRN score
}
cn_score<-function
### runs cn_netScores on each GRN
(expDat,
### expression matrix
subList,
### list of tct => genes
tVals,
### a
classList,
###
minVals=NULL,
### vector of subnet->minVal, # for shifting raw grn establishment values
classWeight=TRUE,
### class weights?
exprWeight=TRUE
### expression weights
){
#nSubnets<-sum(sapply(subList, length));
nSubnets<-length(subList);
ans<-matrix(0, nrow=nSubnets, ncol=ncol(expDat));
ctts<-names(subList);
rnames<-vector();
rIndex<-1;
for(ctt in ctts){
# cat(ctt,"\n");
genes<-subList[[ctt]];
# snNames<-names(subnets);
# rnames<-append(rnames, snNames);
# for(sName in snNames){
ans[rIndex,]<-cn_netScores(expDat, genes, tVals=tVals, ctt=ctt,classList, classWeight=classWeight,exprWeight=exprWeight);
rnames<-append(rnames, ctt);
rIndex<-rIndex+1;
# }
}
rownames(ans)<-rnames;
colnames(ans)<-colnames(expDat);
if(!is.null(minVals)){
minVals<-minVals[rownames(ans)];
ans<-ans-minVals;
}
ans;
### GRN scores
}
cn_outputRes<-function
### write out grn scores, classification scores, normalized data to csv, then tar and zip
(cnRes,
### cnRes object
tfScores,
### NIS
prefix
### filename prefix
){
# write out classRes
classRes<-cnRes[['classRes']];
fname1<-paste(prefix, "_classRes.csv", sep='');
write.csv(classRes, file=fname1);
# write out normalized GRN statuses
grnScores<-cnRes[['normScoresQuery']];
fname2<-paste(prefix, "_grnScores.csv", sep='');
write.csv(grnScores, file=fname2);
# write out normalized query expression data
expDat<-cnRes[['expQuery']];
fname3<-paste(prefix, "_expNorm.csv", sep='');
write.csv(expDat, file=fname3);
# write out one data file of TF scores
tfStab<-.cn_makeTFtable(tfScores);
fname4<-paste(prefix, "_NIS.csv", sep='');
write.csv(tfStab, file=fname4, row.names=F);
fnameOut<-paste(prefix,"_data.tar", sep='')
cmd<-paste("tar -c ", fname1, " ",fname2," ", fname3," > ", fnameOut, sep='');
system(cmd);
cmd<-paste("gzip -f ", fnameOut, sep='');
system(cmd);
fnameOut;
### name of output file
}
.cn_makeTFtable<-function
### convert a tf nis list to a DF
(tfScores){
allTFs<-data.frame();
grnNames<-names(tfScores);
for(grnName in grnNames){
x<-tfScores[[grnName]];
genes<-rownames(x);
#rownames(x)<-'';
x2<-data.frame(reg=genes, grn=rep(grnName, length(genes)));
x2<-cbind(x2, x);
allTFs<-rbind(allTFs, x2);
}
allTFs
}