Skip to content

Commit

Permalink
Add files via upload
Browse files Browse the repository at this point in the history
  • Loading branch information
tma1 authored Aug 9, 2024
1 parent 9b5392a commit 7341fb2
Show file tree
Hide file tree
Showing 7 changed files with 78 additions and 0 deletions.
7 changes: 7 additions & 0 deletions petagraph/code/preprocessing/4DN_LOOP.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
DNL4<-array("",c(1,3));for (i in 1:4) {D <- read.delim(paste("~/",DS[i,1],".txt",sep = ""));D<-D[,c(1:6,12)];
U<-array("",c(1,3));for (j in 1:dim(D)[1]) {u <-paste("4DNL ",DS[i,1],".",D[j,1],".",D[j,2],"-",D[j,3],".",D[j,4],".",D[j,5],"-",D[j,6],sep = "");
us <- cbind(u,"loop_us_start",paste("HSCLO ",D[j,1],".",D[j,2]-999,"-",D[j,2],sep = ""));
ue <- cbind(u,"loop_us_end",paste("HSCLO ",D[j,1],".",D[j,3]-999,"-",D[j,3],sep = ""));
ds <- cbind(u,"loop_ds_start",paste("HSCLO ",D[j,4],".",D[j,5]-999,"-",D[j,5],sep = ""));
de <- cbind(u,"loop_ds_end",paste("HSCLO ",D[j,4],".",D[j,6]-999,"-",D[j,6],sep = ""));
anc<-rbind(us,ue,ds,de);U <-rbind(U,anc)};DNL4<-rbind(DNL4,U)}
3 changes: 3 additions & 0 deletions petagraph/code/preprocessing/4DN_Q.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
DNQ4<-array("",c(1,3));for (i in 1:4) {D <- read.delim(paste("~/",DS[i,1],".txt",sep = ""));D<-D[,c(1:6,12)];
U<-array("",c(dim(D)[1],3));for (j in 1:dim(D)[1]) {U[j,1] <-paste("4DNL ",DS[i,1],".",D[j,1],".",D[j,2],"-",D[j,3],".",D[j,4],".",D[j,5],"-",D[j,6],sep = "");
U[j,2]<-"loop_has_qvalue_bin";U[j,3]<-paste("4DNQ ","1e",floor(log10(D[j,7])),".","1e",ceiling(log10(D[j,7])),sep = "")};DNQ4<-rbind(DNQ4,U)}
23 changes: 23 additions & 0 deletions petagraph/code/preprocessing/CLINVAR.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
# Requires the latest variant_summary.txt from the ClinVar website at NCBI
library(stringr)
VS <- read.delim("~/Path/variant_summary.txt", header=FALSE, comment.char="#")
VS<-unique(VS[,c(6,7,13,25)])
VS<-VS[str_which(VS[,2],'pathogenic'),]
VS<-VS[-str_which(VS[,2],'uncertain'),]
VS<-VS[-str_which(VS[,2],'conflicting'),]
VS<-VS[-which(VS[,4]=='no assertion criteria provided'),]
VS[,3]<-str_replace_all(VS[,3],'\\|',',')
VS[,3]<-str_replace_all(VS[,3],';',',')
V<-strsplit(VS[,3],',')
VR<-array("",c(1,2))
for (i in 1:length(V)){v<-V[[i]];l<-length(v);u<-array(VS[i,1],c(l,2));u[,2]<-v;VR<-rbind(u,VR)}
VR<-unique(VR)
VR<-VR[-which(VR[,2]==""),]
VR<-VR[-str_which(VR[,2],'condition'),]
VR<-VR[-which(VR[,2]=='-'),]
VR<-VR[-which(VR[,1]=='-'),]
VR[,2]<-str_remove_all(VR[,2],'Human Phenotype Ontology:')
VR[,2]<-str_replace_all(VR[,2],'MONDO:MONDO:','MONDO:')
rm(VS,VR,u,v,i)
ClinVar_Edgelist<-cbind(VR,array('gene_assoicated_with_disease_or_phenotype',c(dim(VR)[1],1)))
colnames(ClinVar_Edgelist)<-c('subject','object','predicate')
12 changes: 12 additions & 0 deletions petagraph/code/preprocessing/CMAP.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@

#CMAP edgelist data is required and can be obtained from https://maayanlab.cloud/Harmonizome/resource/Connectivity+Map
CM <- read.delim("~/CMAP.txt")
CM<-cbind(CM$source,CM$target,CM$weight)
colnames(CM)<-c('subject','object','predicate')
CM <-as.data.frame(CM)
CM<-CM[-1,]
SM <-cbind(unique(CM[,2]),unique(CM[,2]))
SMS<-strsplit(SM[,1],'-')
for (i in 1:length(SMS)){v<-SMS[[i]];u<-v[1];l<-length(v);if (l>2) {for (j in 2:(l-1)){u<-paste(u,v[j],sep = '-')}};SM[i,2]<-u}
colnames(SM)<-c('object','object-')
CM<-merge(CM,SM,by = 'object')
1 change: 1 addition & 0 deletions petagraph/code/preprocessing/HSCLO_GENCODE.R
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
for (i in 1:295559) {l <- floor(as.numeric(G[i,2])/1000)*1000; if (G[i,2]>l) {L[i,3] = paste("HSCLO"," ",G[i,6],".",l+1,"-",l+1000,sep = "")} else {L[i,3] = paste("HSCLO"," ",G[i,6],".",l-999,"-",l,sep = "")}}
17 changes: 17 additions & 0 deletions petagraph/code/preprocessing/L1000.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
#L1000 edgelist data is required and can be obtained from https://maayanlab.cloud/Harmonizome/resource/LINCS+L1000+Connectivity+Map
L1000 <- read.delim("~/L1000.txt")
L1000<-as.data.frame(L1000)
Target<-strsplit(L1000$target,"_")
Drugs <-array("",c(length(Target),1))
for (i in 1:length(Target)) {v<-Target[[i]];Drugs[i,]<-v[1]}
L1000<-cbind(L1000$source,Drugs,L1000$weight)
Small_Molecules<-read.csv("SM.csv")
colnames(L1000)<-c("subject","object","predicate")
colnames(Small_Molecules)<-c("object","pubchem_cid")
L1000<-merge(L1000,Small_Molecules,by = "object")
L1000<-L1000[,c(4,3,2)]
colnames(L1000)<-c("subject","predicate","object")
L1000[which(as.numeric(L1000$predicate)==1),2]<-"positively_correlated_with_gene"
L1000[which(as.numeric(L1000$predicate)==-1),2]<-"negatively_correlated_with_gene"
L1000$subject<-paste("PUBCHEM",L1000$subject)
L1000 <- unique(L1000)
15 changes: 15 additions & 0 deletions petagraph/code/preprocessing/MSIGDB.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
library('msigdbr')
library('stringr')
C1 <- msigdbr(species = 'human', category = 'C1');C1 <- as.data.frame(C1[,c(3,2,7)])
C2 <- msigdbr(species = 'human', category = 'C2');C2 <- as.data.frame(C2[,c(3,2,7)])
C2<-C2[-str_which(C2[,1],'KEGG'),]
C3 <- msigdbr(species = 'human', category = 'C3');C3 <- as.data.frame(C3[,c(3,2,7)])
C8 <- msigdbr(species = 'human', category = 'C8');C8 <- as.data.frame(C8[,c(3,2,7)])
H <- msigdbr(species = 'human', category = 'H');H <- as.data.frame(H[,c(3,2,7)])
C1[,2]<-'chr_band_contains_gene'
C2[,2]<-'pathway_associated_with_gene'
C3[,2]<-'targets_expression_of_gene'
C8[,2]<-'has_marker_gene'
H[,2]<-'has_signature_gene'
MSIGDB<-unique(rbind(C1,C2,C3,C8,H))
colnames(MSIGDB)<-c('subject','predicate','object')

0 comments on commit 7341fb2

Please sign in to comment.