-
Notifications
You must be signed in to change notification settings - Fork 1
/
final.project.MCL.MCODE.R
175 lines (145 loc) · 6.48 KB
/
final.project.MCL.MCODE.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
# Created by Suthinan Rujirapipat, 7 December 2015
# As part of master project @ University of Sunderland, Information Technology Management
setwd("E:\\Sunderland\\PROM01 - Final Dissertation\\Development")
library(igraph, lib=".\\library")
library(Rcpp, lib=".\\library")
library(expm, lib=".\\library")
library(MCL, lib=".\\library")
library(dynamicTreeCut, lib=".\\library")
library(RColorBrewer, lib=".\\library")
library(linkcomm, lib=".\\library")
library(ProNet, lib=".\\library")
# http://string-db.org/api/psi-mi-tab/interactionsList?identifiers=APP&limit=10000&required_score=900
APP <- read.table(file = ".\\data\\APPPSIMI.csv", header = FALSE)
APP <- APP[,c("V3","V4")]
APP[,1] = toupper(APP[,1])
APP[,2] = toupper(APP[,2])
APP <- unique(APP)
g <- graph.data.frame(APP, directed=FALSE) # For MCODE
g2 <- graph.data.frame(APP, directed=FALSE) # For MCL
setwd(".\\output")
#======================= MCODE generation and visualisation =======================
# MCODE network clustering
# graph <- An igraph object.
# vwp <-Vertex weight percentage. Default value is 0.5.
# haircut <- Boolean value, whether to remove singly-connected nodes from clusters (TRUE) or not (FALSE).
# fluff <- Boolean value, whether to spand cluster cores by one neighbour shell outwards (TRUE) or not (FALSE).
# fdt <- Cluster density cutoff. Default value is 0.8.
# loops <- Boolean value, whether to include self-loops (TRUE) or not (FALSE).
mcg <- mcode(g, vwp=0.0, loops = FALSE, haircut= TRUE, fluff = FALSE, fdt= 0.2)
# Create and visualise of induced subgraphs (clusters), just in case
#
# cluster1 <- induced.subgraph(g, mcg$COMPLEX[[1]])
# writeClipboard(V(cluster1)$name)
#
# visualization(cluster1,
# node.size=4,
# node.label=V(cluster1)$name,
# node.label.color="blue",
# edge.color="gray",
# edge.width=0.1,
# node.fill.color = "red")
#
summary(mcg$COMPLEX)
index <- which(!is.na(mcg$score))
membership <- rep(0, vcount(g))
for (i in 1:length(index)) {
membership[mcg$COMPLEX[[index[i]]]] <- i
}
# names(membership) <- V(g)$name
# Find top 5 largest clusters; This will ignore community overlapping nodes using first in first out assignment
sort(table(unlist(membership[membership != 0])), decreasing = TRUE)[1:5]
transitivity(induced.subgraph(g, mcg$COMPLEX[[14]]), isolates = "zero")
names(membership) <- V(g)$name
# lc$Cluster <- lc$Cluster
writeClipboard(names(membership)[membership == 1])
length(lc$name[lc$Cluster == 3])
color <- "white" # Initialised
color[membership == 1] <- "red"
color[membership == 2] <- "pink"
color[membership == 3] <- "aquamarine"
color[membership == 4] <- "yellow"
color[membership == 5] <- "magenta"
# Create network graph of the result
png(file=paste(format(Sys.time(),"%H-%M-%S"),"MCODE", "png", sep = "."),height=1500, width=1500, bg="white")
visualization(graph = g,
layout="kamada.kawai",
node.size=4,
node.label.color="blue",
edge.color = "gray",
edge.width=0.1,
node.fill.color = color)
# node.fill.color = list(heat.colors(5)))
title(main = "Molecular Complex Detection Algorithm (MCODE)", cex.main = 4)
legend("bottomright", legend = c('color[membership == 1] <- "red"', 'color[membership == 2] <- "pink"',
'color[membership == 4] <- "aquamarine"', 'color[membership == 5] <- "yellow"',
'color[membership == 9] <- "magenta"'), pch = 1, title = "Cluster Size")
dev.off()
#======================= MCL generation and visualisation ==================================
# Convert igraph graph object to adjacency matrix
adj <- matrix(rep(0, length(V(g2))^2), nrow = length(V(g2)), ncol = length(V(g2)))
for (i in 1:length(V(g2))) {
neighbors <- neighbors(g2, v = V(g2)$name[i], mode = "all")
j <- match(neighbors$name, V(g2)$name, nomatch = 0)
adj[i, j] = 1
}
# Clear temporary variables
rm(i)
rm(j)
rm(neighbors)
# Markov Cluster Algorithm
# x <- an adjacency or (n x n) matrix
# addLoops <-logical; if TRUE, self-loops with weight 1 are added to each vertex of x (see Details).
# expansion <- numeric value > 1 for the expansion parameter
# inflation <- numeric value > 0 for the inflation power coefficient
# allow1 <- logical; if TRUE, vertices are allowed to form their own cluster.
# If FALSE, clusters of size 1 are interpreted as background noise and grouped in one cluster.
# max.iter <- an interger, the maximum number of iterations for the MCL
# ESM <- logical whether the equilibrium state matrix should be returned (default is FALSE)
lc <- mcl(adj, addLoops = TRUE, inflation = 1.8, allow1 = TRUE, ESM = FALSE)
# Check MCL complexes' size
lc
# Find top 5 largest clusters
sort(table(unlist(lc$Cluster)), decreasing = TRUE)[1:5]
transitivity(induced.subgraph(g2, lc$Cluster == 9), isolates = "zero")
lc$name <- V(g2)$name
# lc$Cluster <- lc$Cluster
writeClipboard(lc$name[lc$Cluster == 3])
length(lc$name[lc$Cluster == 3])
# Assign color according to the top 5
color2 <- "white" # Initialised
color2[lc$Cluster == 3] <- "red"
color2[lc$Cluster == 1] <- "pink"
color2[lc$Cluster == 5] <- "aquamarine"
color2[lc$Cluster == 6] <- "yellow"
color2[lc$Cluster == 7] <- "magenta"
# Create network graph of the result
png(file=paste(format(Sys.time(),"%H-%M-%S"),"MCL", "png", sep = "."),height=1500, width=1500, bg="white")
visualization(graph = g2,
layout="kamada.kawai",
node.size=4,
node.label.color="blue",
edge.color = "gray",
edge.width=0.1,
node.fill.color = color2)
title(main = "Markov Clustering (MCL)", cex.main = 4)
legend("bottomright", legend = c('color[membership == 3] <- "red"', 'color[membership == 1] <- "pink"',
'color[membership == 5] <- "aquamarine"', 'color[membership == 6] <- "yellow"',
'color[membership == 7] <- "magenta"'), pch = 1, title = "Cluster Size")
dev.off()
# generateMCL function, used To find the induced subgraph
# generateMCL <- function (x, colorx) {
#
# mclCluster <- induced.subgraph(g2, lc$Cluster == x)
#
# visualization(graph = mclCluster,
# layout="fruchterman.reingold",
# node.size=8,
# node.label=V(mclCluster)$name,
# node.label.color="blue",
# edge.color = "gray",
# edge.width=0.1,
# node.fill.color = colorx)
# }
#
# generateMCL(2,"yellow")