-
Notifications
You must be signed in to change notification settings - Fork 6
/
Copy pathrefineString.R
56 lines (49 loc) · 1.64 KB
/
refineString.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
# codeTemplate.R
#
# Purpose: Further refine the STRING subnetwork to include all
# and only genes for which we also have expression data.
# Version: v0.1
# Date: 2016-04-06
# Author: Ryoga
#
# Input: STRING network and FANTOM5 expression data
# Output: STRING subnetwork that includes all
# and only genes for which we also have expression data.
#
# ToDo (Steps):
# 3. Normalize imported datasets
#
# DONE:
# 1. import STRING network data
# 2. import FANTOM5 expression data
#
# V 0.1: Import data sets
#
# V 1.0: Select overlapped genes (really slow)
#
# V 1.1: Improved speed by using %in%
#
# Questions:
# 1. Does this solution acturally selected all the genes we want in STRING net?
# ====================================================================
# setwd(DEVDIR)
# set working dir
# source("./fantom_import/fantom_main.R")
# ================= PARAMETERS AND INPUT FILES ====================
refinedSTRING <- file.path("./WEAVE", "refinedSTRINGnet.Rdata")
# load STRING dataset file
STRINGdata <- load("./WEAVE/curatedOutput.RData")
# load FANTOM5 dataset file
FANTOM5 <- read.csv("./Normalize_TF/FANTOM5_TFList.csv", header=TRUE, fill=TRUE)
FANTOM5 <- FANTOM5[ , -(3:5)]
# load TF list data
TFList <- read.table("./Normalize_TF/Transcription Factor List.txt")
# ================== Construct the network =========================
fantomGene <- FANTOM5$Symbol
tf <- TFList$x
ptm <- proc.time()
STRINGnew<-src[src$hgnc_1 %in% fantomGene,] # 1192824 obs
STRINGtf<-src[src$hgnc_1 %in% tf,]# 1145696 obs
proc.time() - ptm #~3secs
save(STRINGnew, file = refinedSTRING)
#save(STRINGtf, file = refinedSTRING)