Skip to content

Commit

Permalink
Overhaul graph reading and writing to support Multi/Di/Weighted graph…
Browse files Browse the repository at this point in the history
…s naturally.
  • Loading branch information
sligocki committed Jan 10, 2024
1 parent e4e3674 commit c3f56b2
Show file tree
Hide file tree
Showing 7 changed files with 52 additions and 40 deletions.
2 changes: 1 addition & 1 deletion circles_graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ def main():
graph = graph_tools.make_graph(nodes, edges)
utils.log(f"Created graph with {graph.number_of_nodes():_} nodes and {graph.number_of_edges():_} edges")

graph_file = Path("results", "circles", "graph", f"{args.focus_id}.{args.num_circles}.adj.nx")
graph_file = Path("results", "circles", "graph", f"{args.focus_id}.{args.num_circles}")
graph_file.parent.mkdir(parents=True, exist_ok=True)
graph_tools.write_graph(graph, graph_file)
utils.log(f"Wrote: {str(graph_file)}")
Expand Down
2 changes: 1 addition & 1 deletion dump_build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ echo "(4) Building Graph"
# 2m
time python3 graph_make_family.py --version=${TIMESTAMP}
# 10m
time python3 graph_core.py ${VERSION_DIR}/graphs/family/all.graph.adj.nx
time python3 graph_core.py ${VERSION_DIR}/graphs/family/all.adj.nx

echo
echo "(5) Convert to SQLite DB"
Expand Down
17 changes: 8 additions & 9 deletions graph_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -168,8 +168,8 @@ def main():
utils.log(f"Found main component: {len(graph.nodes):_} Nodes / {len(graph.edges):_} Edges")
utils.log(degree_distr_str(graph))

filename = graph_dir / "main.graph.adj.nx"
graph_tools.write_graph(graph, filename)
basename = graph_dir / "main"
filename = graph_tools.write_graph(graph, basename)
utils.log(f"Saved main component to {str(filename)}")

# Need to copy, because graph is frozen here (due to being a subgraph).
Expand All @@ -178,21 +178,20 @@ def main():
utils.log(f"Shaved graph: {len(graph.nodes):_} Nodes / {len(graph.edges):_} Edges")
utils.log(degree_distr_str(graph))

filename = graph_dir / "shaved.graph.adj.nx"
graph_tools.write_graph(graph, filename)
basename = graph_dir / "shaved"
filename = graph_tools.write_graph(graph, basename)
utils.log(f"Saved shaved main to {str(filename)}")

# Map: core nodes -> nodes that collapse into this core node
graph, rep_nodes = FindCore(graph)
utils.log(f"Contracted graph: {len(graph.nodes):_} Nodes / {len(graph.edges):_} Edges / {num_dup_edges(graph):_} Duplicate edges / {nx.number_of_selfloops(graph):_} Selfloops")
utils.log(degree_distr_str(graph))

# Save as "Edgelist" to support edge weights.
filename = graph_dir / "core.graph.edgelist.nx"
graph_tools.write_graph(graph, filename)
utils.log(f"Saved core to {str(filename)}")
basename = graph_dir / "topo"
filename = graph_tools.write_graph(graph, basename)
utils.log(f"Saved topo to {str(filename)}")

filename = graph_dir / "core.collapse.csv"
filename = graph_dir / "topo.collapse.csv"
with open(filename, "w") as f:
csv_out = csv.DictWriter(f, ["core_node", "sub_node"])
csv_out.writeheader()
Expand Down
4 changes: 2 additions & 2 deletions graph_make_bipartite.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,8 +109,8 @@ def main():
del df, child_edges, parent_edges
utils.log(f"Built graph with {len(graph.nodes):_} Nodes / {len(graph.edges):_} Edges")

filename = Path(graph_dir, "all.graph.adj.nx")
graph_tools.write_graph(graph, filename)
basename = Path(graph_dir, "all")
filename = graph_tools.write_graph(graph, basename)
utils.log(f"Saved graph to {str(filename)}")

utils.log("Finished")
Expand Down
4 changes: 2 additions & 2 deletions graph_make_family.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,8 +86,8 @@ def main():
graph = nx.from_pandas_edgelist(df, "child_id", "parent_id")
utils.log(f"Built graph with {len(graph.nodes):_} Nodes / {len(graph.edges):_} Edges")

filename = Path(graph_dir, "all.graph.adj.nx")
graph_tools.write_graph(graph, filename)
basename = Path(graph_dir, "all")
filename = graph_tools.write_graph(graph, basename)
utils.log(f"Saved graph to {str(filename)}")

utils.log("Finished")
Expand Down
49 changes: 31 additions & 18 deletions graph_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,36 +64,49 @@ def load_graph_nk(filename):
else:
raise Exception(f"Invalid graph filename: {filename}")

def load_graph(filename):
def load_graph(filename : Path) -> nx.Graph:
"""Load a graph from various formats depending on the extensions."""
filename = Path(filename)
if ".adj" in filename.suffixes:
return nx.read_adjlist(filename)

elif ".edgelist" in filename.suffixes:
return nx.read_weighted_edgelist(filename)
if ".multi" in filename.suffixes:
if ".di" in filename.suffixes:
g_type = nx.MultiDiGraph
else:
g_type = nx.MultiGraph
else:
if ".di" in filename.suffixes:
g_type = nx.DiGraph
else:
g_type = nx.Graph

if ".adj" in filename.suffixes:
return nx.read_adjlist(filename, create_using=g_type)

elif ".gml" in filename.suffixes:
return nx.read_gml(filename)
elif ".edges" in filename.suffixes:
return nx.read_weighted_edgelist(filename, create_using=g_type)

else:
raise Exception(f"Invalid graph filename: {filename}")

def write_graph(graph, filename):
"""Write a graph into various formats depending on the extension."""
filename = Path(filename)
if ".adj" in filename.suffixes:
assert not is_weighted(graph), "Writing adjlist drops weights!"
nx.write_adjlist(graph, filename)
def write_graph(graph : nx.Graph, basename : Path) -> Path:
"""Write a graph into various formats depending on Type."""
basename = str(basename)

if graph.is_directed():
basename += ".di"

if graph.is_multigraph():
basename += ".multi"

elif ".edgelist" in filename.suffixes:
if is_weighted(graph):
filename = Path(basename + ".weight.edges.nx")
nx.write_weighted_edgelist(graph, filename)

elif ".gml" in filename.suffixes:
nx.write_gml(graph, filename)
else: # Non-weighted
filename = Path(basename + ".adj.nx")
nx.write_adjlist(graph, filename)

else:
raise Exception(f"Invalid graph filename: {filename}")
return filename

def write_graph_nk(graph, names, filename):
filename = Path(filename)
Expand Down
14 changes: 7 additions & 7 deletions shapinsay_analyze.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ def build_bipartite_graph(user_nums, db):
graph_info.add_person(user_num)

graph = nx.Graph()
for id in graph_info.compute_node_ids():
for id in graph_info.compute_union_ids():
graph.add_node(id)
for (id1, id2) in graph_info.edge_ids:
graph.add_edge(id1, id2)
Expand Down Expand Up @@ -152,8 +152,8 @@ def main():
utils.log(f"Computed graph with {len(graph.nodes):_} nodes and "
f"{len(graph.edges):_} edges")

graph_tools.write_graph(graph, "results/shapinsay/full.graph.adj.nx")
utils.log("Wrote graph to results/shapinsay/full.graph.adj.nx")
filename = graph_tools.write_graph(graph, "results/shapinsay/full")
utils.log(f"Wrote graph to {str(filename)}")

components = sorted(nx.connected_components(graph), key = len, reverse=True)

Expand Down Expand Up @@ -191,8 +191,8 @@ def main():

print()
main = graph.subgraph(components[0])
graph_tools.write_graph(main, "results/shapinsay/main.graph.adj.nx")
utils.log("Wrote main component to results/shapinsay/main.graph.adj.nx")
filename = graph_tools.write_graph(main, "results/shapinsay/main")
utils.log(f"Wrote main component to {str(filename)}")

bicomps = sorted(nx.biconnected_components(main), key = len, reverse=True)

Expand Down Expand Up @@ -230,8 +230,8 @@ def main():

print()
main_bi = graph.subgraph(bicomps[0])
graph_tools.write_graph(main_bi, "results/shapinsay/main_bi.graph.adj.nx")
utils.log("Wrote main bi-component to results/shapinsay/main_bi.graph.adj.nx")
filename = graph_tools.write_graph(main_bi, "results/shapinsay/main_bi")
utils.log(f"Wrote main bi-component to {str(filename)}")

print()
utils.log(f"Circle sizes around {most_central[0]}:")
Expand Down

0 comments on commit c3f56b2

Please sign in to comment.