-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathgraph_growth.py
146 lines (122 loc) · 4.91 KB
/
graph_growth.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
"""
Describe the growth of a network over time.
Ex: Examine correllation between a nodes degree and it's chance of gaining an edge.
"""
import argparse
import collections
from collections.abc import Mapping
from pathlib import Path
import pickle
import pandas as pd
import matplotlib.pyplot as plt
import networkx as nx
import graph_analyze
import graph_tools
import utils
Node = str
def count_degree_changes(old_graph : nx.Graph, new_graph : nx.Graph
) -> tuple[collections.Counter[int], collections.Counter[int]]:
nodes_common = set(old_graph.nodes) & set(new_graph.nodes)
utils.log(f" Nodes in common: {len(nodes_common):_}")
# Degree distribution over all `nodes_common`.
degrees_all : collections.Counter[int] = collections.Counter()
# Degree distribution of nodes that were attached to (edges added, i.e. degree increased).
degrees_attached : collections.Counter[int] = collections.Counter()
for node in nodes_common:
# We would like to iterate over all added edges, but since nodes can be
# "renamed", new edges might not all really be new. So instead we iterate
# over all nodes that increase in degree (gain meaningful edges).
old_deg = old_graph.degree[node]
new_deg = new_graph.degree[node]
degrees_all[old_deg] += 1
if new_deg > old_deg:
# Add 1 for *per* additional edge added.
# degrees_attached[old_deg] += new_deg - old_deg
for deg in range(old_deg, new_deg):
# Instead of treating this as 3 edges added to a degree 2 node, treat it
# as degree 2 +1 edge, degree 3 +1 edge, degree 4 +1 edge b/c that's
# what's really happening, we just don't have that fine-grain resolution
# to know which order they happened in.
degrees_attached[deg] += 1
return degrees_all, degrees_attached
def load_degrees(graphs : nx.Graph):
utils.log(f"Running comparison over {len(graphs)} timesteps")
old_id = graphs[0].parent.name
old_graph = graph_tools.load_graph(graphs[0])
utils.log(f"Loaded {graphs[0]}: # Nodes: {old_graph.number_of_nodes():_} # Edges: {old_graph.number_of_edges():_}")
degrees = {}
attachments = {}
for new in graphs[1:]:
new_id = new.parent.name
new_graph = graph_tools.load_graph(new)
utils.log(f"Loaded {new}: # Nodes: {new_graph.number_of_nodes():_} # Edges: {new_graph.number_of_edges():_}")
degrees[old_id], attachments[old_id] = count_degree_changes(old_graph, new_graph)
utils.log(f" Degree added: {attachments[old_id].total():_}")
old_id = new_id
old_graph = new_graph
return degrees, attachments
def degree_biases(degree_dist : Mapping[int, int], attach_degrees : Mapping[int, int]):
"""Return biases vs uniform or preferential attachment."""
total_deg = sum(degree_dist.values())
total_deg_pref = sum(d * count for (d, count) in degree_dist.items())
total_attach = sum(attach_degrees.values())
bias_unif = {}
bias_pref = {}
for n in range(1, 16):
# Fraction of nodes with this degree
deg_frac = degree_dist[n] / total_deg
# Probability of picking a node of this degree using Preferential attachment.
deg_pref = (n * degree_dist[n]) / total_deg_pref
# Fraction of attachments to nodes with this degree
attach_frac = attach_degrees[n] / total_attach
if deg_frac > 0.0:
# Bias in attaching to nodes of this degree vs. uniform random attachment
bias_unif[n] = attach_frac / deg_frac
# Bias vs. preferential attachment (proportional to degree)
bias_pref[n] = attach_frac / deg_pref
return bias_unif, bias_pref
def plot(df, ax, ylabel, ymax):
params = {
"xlabel": "Degree",
"xlim": (0, 15),
"xticks": range(16),
"ylabel": ylabel,
"ylim": (0.0, ymax),
"grid": True,
"legend": False,
}
# Plot all biases
df.transpose().plot(ax=ax[0], **params)
# Plot mean/stddev
mean = df.mean()
stddev = df.std()
mean.plot(ax=ax[1], **params)
ax[1].fill_between(mean.index, mean + stddev, mean - stddev,
color="b", alpha=0.1)
def main():
parser = argparse.ArgumentParser()
parser.add_argument("graphs", type=Path, nargs="*")
parser.add_argument("--data", type=Path, default="growth_degrees.pickle")
args = parser.parse_args()
if args.graphs:
degrees, attachments = load_degrees(args.graphs)
with open(args.data, "wb") as f:
pickle.dump((degrees, attachments), f)
else:
with open(args.data, "rb") as f:
degrees, attachments = pickle.load(f)
# Compute biases
biases_unif = []
biases_pref = []
for version in attachments:
bias_unif, bias_pref = degree_biases(degrees[version], attachments[version])
biases_unif.append(bias_unif)
biases_pref.append(bias_pref)
bias_unif = pd.DataFrame(biases_unif)
bias_pref = pd.DataFrame(biases_pref)
fig, axes = plt.subplots(2, 2)
plot(bias_unif, axes[0], "Bias vs. Uniform", 4.0)
plot(bias_pref, axes[1], "Bias vs. Preferential", 1.4)
plt.show()
if __name__ == "__main__":
main()