-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path200624_DEjaccard.R
217 lines (171 loc) · 8.17 KB
/
200624_DEjaccard.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
library(cmapR)
library(pbapply)
setwd("~/Dropbox/GDB/CMapCorr/")
jaccard_similarity_fx <- function(A,B) {
length(intersect(A,B)) / length(union(A,B))
}
# lvl4 ----
if (exists("lvl4_data")) {
} else if (file.exists("~/Dropbox/GDB_archive/CMapCorr_files/lvl4_inputs.RData")) {
load("~/Dropbox/GDB_archive/CMapCorr_files/lvl4_inputs.RData")
} else {
source("lvl4_inputs.R")
}
lvl4_de <- apply(lvl4_data@mat,2,function(X) names(X)[X > 1.645])
lvl4_jacc_ct <- pbsapply(unique(lvl4_data@cdesc$cell_id),function(CT) {
temp <- combn(rownames(lvl4_data@cdesc)[lvl4_data@cdesc$cell_id == CT],2)
mapply(jaccard_similarity_fx,A=lvl4_de[temp[1,]],B=lvl4_de[temp[2,]])
},simplify=F)
lvl4_jacc_lig <- pbsapply(unique(lvl4_data@cdesc$pert_iname),function(LIG) {
temp <- combn(rownames(lvl4_data@cdesc)[lvl4_data@cdesc$pert_iname == LIG],2)
mapply(jaccard_similarity_fx,A=lvl4_de[temp[1,]],B=lvl4_de[temp[2,]])
},simplify=F)
lvl4_jacc_lig_ct <- list()
for (LIG in unique(lvl4_data@cdesc$pert_iname)) {
temp_cond <- unique(lvl4_data@cdesc[lvl4_data@cdesc$pert_iname == LIG,"cell_id"])
temp_cond_id <- sapply(temp_cond,function(CT)
rownames(lvl4_data@cdesc)[
lvl4_data@cdesc$pert_iname == LIG &
lvl4_data@cdesc$cell_id == CT
],simplify=F)
lvl4_jacc_lig_ct[[LIG]] <- sapply(
temp_cond_id[sapply(temp_cond_id,length) > 1],function(X) {
temp <- combn(X,2)
mapply(jaccard_similarity_fx,A=lvl4_de[temp[1,]],B=lvl4_de[temp[2,]])
},simplify=F)
}
lvl4_jacc_lig_ct <- lvl4_jacc_lig_ct[sapply(lvl4_jacc_lig_ct,length) > 0]
lvl4_jacc_lig_ct <- sapply(lvl4_jacc_lig_ct,unlist,simplify=F)
lvl4_jacc_lig_tx <- list()
for (LIG in unique(lvl4_data@cdesc$pert_iname)) {
temp_cond <- unique(lvl4_data@cdesc[lvl4_data@cdesc$pert_iname == LIG,c("cell_id","pert_dose","pert_time")])
temp_cond_str <- apply(temp_cond,1,function(X) paste0(X[1],"_",X[2],"ng/mL_",X[3],"hr"))
temp_cond_id <- sapply(1:nrow(temp_cond),function(X)
rownames(lvl4_data@cdesc)[
lvl4_data@cdesc$pert_iname == LIG &
lvl4_data@cdesc$cell_id == temp_cond[X,1] &
lvl4_data@cdesc$pert_dose == temp_cond[X,2] &
lvl4_data@cdesc$pert_time == temp_cond[X,3]
],simplify=F)
names(temp_cond_id) <- temp_cond_str
lvl4_jacc_lig_tx[[LIG]] <- sapply(
temp_cond_id[sapply(temp_cond_id,length) > 1],function(X) {
temp <- combn(X,2)
mapply(jaccard_similarity_fx,A=lvl4_de[temp[1,]],B=lvl4_de[temp[2,]])
},simplify=F)
}
lvl4_jacc_lig_tx <- lvl4_jacc_lig_tx[sapply(lvl4_jacc_lig_tx,length) > 0]
lvl4_jacc_lig_tx <- sapply(lvl4_jacc_lig_tx,unlist,simplify=F)
save(list=c("lvl4_de",ls()[grepl("lvl4",ls()) & grepl("jacc",ls())]),
file="~/Dropbox/GDB/CMapCorr_files/200624_lvl4jacc.RData")
rm(list=c("LIG","CT",grep("^temp",ls(),value=T),grep("^lvl4_",ls(),value=T)))
# lvl5 ----
if (exists("lvl5_data")) {
} else if (file.exists("~/Dropbox/GDB_archive/CMapCorr_files/lvl5_inputs.RData")) {
load("~/Dropbox/GDB_archive/CMapCorr_files/lvl5_inputs.RData")
} else {
source("lvl5_inputs.R")
}
lvl5_de <- apply(lvl5_data@mat,2,function(X) names(X)[X > 1.645])
lvl5_jacc_ct <- pbsapply(unique(lvl5_data@cdesc$cell_id),function(CT) {
temp <- combn(rownames(lvl5_data@cdesc)[lvl5_data@cdesc$cell_id == CT],2)
mapply(jaccard_similarity_fx,A=lvl5_de[temp[1,]],B=lvl5_de[temp[2,]])
},simplify=F)
lvl5_jacc_lig <- pbsapply(unique(lvl5_data@cdesc$pert_iname),function(LIG) {
temp <- combn(rownames(lvl5_data@cdesc)[lvl5_data@cdesc$pert_iname == LIG],2)
mapply(jaccard_similarity_fx,A=lvl5_de[temp[1,]],B=lvl5_de[temp[2,]])
},simplify=F)
lvl5_jacc_lig_ct <- list()
for (LIG in unique(lvl5_data@cdesc$pert_iname)) {
temp_cond <- unique(lvl5_data@cdesc[lvl5_data@cdesc$pert_iname == LIG,"cell_id"])
temp_cond_id <- sapply(temp_cond,function(CT)
rownames(lvl5_data@cdesc)[
lvl5_data@cdesc$pert_iname == LIG &
lvl5_data@cdesc$cell_id == CT
],simplify=F)
lvl5_jacc_lig_ct[[LIG]] <- sapply(
temp_cond_id[sapply(temp_cond_id,length) > 1],function(X) {
temp <- combn(X,2)
mapply(jaccard_similarity_fx,A=lvl5_de[temp[1,]],B=lvl5_de[temp[2,]])
},simplify=F)
}
lvl5_jacc_lig_ct <- lvl5_jacc_lig_ct[sapply(lvl5_jacc_lig_ct,length) > 0]
lvl5_jacc_lig_ct <- sapply(lvl5_jacc_lig_ct,unlist,simplify=F)
lvl5_jacc_lig_tx <- list()
for (LIG in unique(lvl5_data@cdesc$pert_iname)) {
temp_cond <- unique(lvl5_data@cdesc[lvl5_data@cdesc$pert_iname == LIG,c("cell_id","pert_dose","pert_time")])
temp_cond_str <- apply(temp_cond,1,function(X) paste0(X[1],"_",X[2],"ng/mL_",X[3],"hr"))
temp_cond_id <- sapply(1:nrow(temp_cond),function(X)
rownames(lvl5_data@cdesc)[
lvl5_data@cdesc$pert_iname == LIG &
lvl5_data@cdesc$cell_id == temp_cond[X,1] &
lvl5_data@cdesc$pert_dose == temp_cond[X,2] &
lvl5_data@cdesc$pert_time == temp_cond[X,3]
],simplify=F)
names(temp_cond_id) <- temp_cond_str
lvl5_jacc_lig_tx[[LIG]] <- sapply(
temp_cond_id[sapply(temp_cond_id,length) > 1],function(X) {
temp <- combn(X,2)
mapply(jaccard_similarity_fx,A=lvl5_de[temp[1,]],B=lvl5_de[temp[2,]])
},simplify=F)
}
lvl5_jacc_lig_tx <- lvl5_jacc_lig_tx[sapply(lvl5_jacc_lig_tx,length) > 0]
lvl5_jacc_lig_tx <- sapply(lvl5_jacc_lig_tx,unlist,simplify=F)
save(list=c("lvl5_de",ls()[grepl("lvl5",ls()) & grepl("jacc",ls())]),
file="~/Dropbox/GDB/CMapCorr_files/200624_lvl5jacc.RData")
rm(list=c("LIG","CT",grep("^temp",ls(),value=T),grep("^lvl5_",ls(),value=T)))
# lvl4new ----
if (exists("lvl4new_data")) {
} else if (file.exists("~/Dropbox/GDB_archive/CMapCorr_files/lvl4new.RData")) {
load("~/Dropbox/GDB_archive/CMapCorr_files/lvl4new.RData")
} else {
source("200706_ZscoreFromAssayed.R")
}
lvl4new_de <- apply(lvl4new_data@mat,2,function(X) names(X)[X > 1.645])
lvl4new_jacc_ct <- pbsapply(unique(lvl4new_data@cdesc$cell_id),function(CT) {
temp <- combn(rownames(lvl4new_data@cdesc)[lvl4new_data@cdesc$cell_id == CT],2)
mapply(jaccard_similarity_fx,A=lvl4new_de[temp[1,]],B=lvl4new_de[temp[2,]])
},simplify=F)
lvl4new_jacc_lig <- pbsapply(unique(lvl4new_data@cdesc$pert_iname),function(LIG) {
temp <- combn(rownames(lvl4new_data@cdesc)[lvl4new_data@cdesc$pert_iname == LIG],2)
mapply(jaccard_similarity_fx,A=lvl4new_de[temp[1,]],B=lvl4new_de[temp[2,]])
},simplify=F)
lvl4new_jacc_lig_ct <- list()
for (LIG in unique(lvl4new_data@cdesc$pert_iname)) {
temp_cond <- unique(lvl4new_data@cdesc[lvl4new_data@cdesc$pert_iname == LIG,"cell_id"])
temp_cond_id <- sapply(temp_cond,function(CT)
rownames(lvl4new_data@cdesc)[
lvl4new_data@cdesc$pert_iname == LIG &
lvl4new_data@cdesc$cell_id == CT
],simplify=F)
lvl4new_jacc_lig_ct[[LIG]] <- sapply(
temp_cond_id[sapply(temp_cond_id,length) > 1],function(X) {
temp <- combn(X,2)
mapply(jaccard_similarity_fx,A=lvl4new_de[temp[1,]],B=lvl4new_de[temp[2,]])
},simplify=F)
}
lvl4new_jacc_lig_ct <- lvl4new_jacc_lig_ct[sapply(lvl4new_jacc_lig_ct,length) > 0]
lvl4new_jacc_lig_ct <- sapply(lvl4new_jacc_lig_ct,unlist,simplify=F)
lvl4new_jacc_lig_tx <- list()
for (LIG in unique(lvl4new_data@cdesc$pert_iname)) {
temp_cond <- unique(lvl4new_data@cdesc[lvl4new_data@cdesc$pert_iname == LIG,c("cell_id","pert_dose","pert_time")])
temp_cond_str <- apply(temp_cond,1,function(X) paste0(X[1],"_",X[2],"ng/mL_",X[3],"hr"))
temp_cond_id <- sapply(1:nrow(temp_cond),function(X)
rownames(lvl4new_data@cdesc)[
lvl4new_data@cdesc$pert_iname == LIG &
lvl4new_data@cdesc$cell_id == temp_cond[X,1] &
lvl4new_data@cdesc$pert_dose == temp_cond[X,2] &
lvl4new_data@cdesc$pert_time == temp_cond[X,3]
],simplify=F)
names(temp_cond_id) <- temp_cond_str
lvl4new_jacc_lig_tx[[LIG]] <- sapply(
temp_cond_id[sapply(temp_cond_id,length) > 1],function(X) {
temp <- combn(X,2)
mapply(jaccard_similarity_fx,A=lvl4new_de[temp[1,]],B=lvl4new_de[temp[2,]])
},simplify=F)
}
lvl4new_jacc_lig_tx <- lvl4new_jacc_lig_tx[sapply(lvl4new_jacc_lig_tx,length) > 0]
lvl4new_jacc_lig_tx <- sapply(lvl4new_jacc_lig_tx,unlist,simplify=F)
save(list=c("lvl4new_de",ls()[grepl("lvl4new",ls()) & grepl("jacc",ls())]),
file="~/Dropbox/GDB/CMapCorr_files/200706_lvl4newjacc.RData")
rm(list=c("LIG",grep("^temp",ls(),value=T),grep("^lvl4new_",ls(),value=T)))