-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathgenerate_synthetic_data.py
84 lines (60 loc) · 2.59 KB
/
generate_synthetic_data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
"""
Author: Shadi Zabad
Date: April 2020
"""
import networkx as nx
import numpy as np
import random
import os
from utils import make_dirs
def complete_disconnected_graph(g):
g_comp = list(nx.connected_components(g))
while len(g_comp) > 1:
g.add_edge(random.sample(g_comp[0], 1)[0], random.sample(g_comp[1], 1)[0])
g_comp = list(nx.connected_components(g))
def generate_synthetic_graphs(n_graphs=2,
n_nodes=100,
main_graph="ER",
edge_removal_prob=0.1,
node_removal_prob=0.1):
graphs = []
if main_graph == "WS":
g0 = nx.watts_strogatz_graph(n=n_nodes, k=5, p=0.85)
elif main_graph == "ER":
g0 = nx.erdos_renyi_graph(n=n_nodes, p=0.15)
elif main_graph == "PA":
g0 = nx.barabasi_albert_graph(n=n_nodes, m=n_nodes // 20)
else:
raise NotImplementedError
# Complete graph (in case of isolates or disconnected components):
complete_disconnected_graph(g0)
graphs.append(g0)
for i in range(n_graphs - 1):
edge_prob = np.random.binomial(1, edge_removal_prob, size=g0.number_of_edges()).astype(bool)
sampled_edges = np.array(list(g0.edges()))[~edge_prob]
ng = nx.Graph()
ng.add_edges_from(sampled_edges)
node_prob = np.random.binomial(1, node_removal_prob, size=ng.number_of_nodes()).astype(bool)
sampled_nodes = np.array(list(ng.nodes()))[node_prob]
ng.remove_nodes_from(sampled_nodes)
# Complete graph (in case of isolates or disconnected components):
complete_disconnected_graph(ng)
graphs.append(ng)
return graphs
if __name__ == '__main__':
main_dir = "./data/synthetic/"
graph_types = ['ER', 'PA', 'WS']
n_nodes = [50, 100, 1000]
edge_removal_prob = [0.05, 0.1, 0.2, 0.3]
node_removal_prob = [0.05, 0.1, 0.2, 0.3]
for gt in graph_types:
for n in n_nodes:
for erp in edge_removal_prob:
for nrp in node_removal_prob:
output_dir = os.path.join(main_dir, gt, 'n_' + str(n),
'erp_' + str(erp), 'nrp_' + str(nrp), 'edgelist_data')
make_dirs(output_dir)
graphs = generate_synthetic_graphs(n_graphs=6, n_nodes=n, main_graph=gt,
edge_removal_prob=erp, node_removal_prob=nrp)
for i, g in enumerate(graphs):
nx.write_edgelist(g, os.path.join(output_dir, 'g' + str(i) + '.edgelist'))