Skip to content

Commit 136bcf9

Browse files
committed
Changed to reflect new naming scheme for relations.
1 parent e75a0cd commit 136bcf9

File tree

1 file changed

+44
-43
lines changed

1 file changed

+44
-43
lines changed

load_gexf_to_neo4j.py

+44-43
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
#
1111
# Copyright (C) 2012 ISI Foundation
1212
# written by Ciro Cattuto <[email protected]>
13+
# and Andre' Panisson <[email protected]>
1314
#
1415
# This program is free software: you can redistribute it and/or modify
1516
# it under the terms of the GNU General Public License as published by
@@ -73,23 +74,23 @@ def get_intervals(tstart, tstop):
7374
nodes = graph.find('{http://www.gexf.net/1.2draft}nodes')
7475

7576
for node in nodes.findall('{http://www.gexf.net/1.2draft}node'):
76-
tag_id = int(node.get('id'))
77-
if not tag_id in NODE_TIMELINE:
78-
NODE_TIMELINE[tag_id] = set()
77+
node_id = int(node.get('id'))
78+
if not node_id in NODE_TIMELINE:
79+
NODE_TIMELINE[node_id] = set()
7980
for spell in node.findall('{http://www.gexf.net/1.2draft}spells/{http://www.gexf.net/1.2draft}spell'):
8081
t1, t2 = int(spell.get('start')), int(spell.get('end'))
81-
NODE_TIMELINE[tag_id].update( get_intervals(t1, t2) )
82+
NODE_TIMELINE[node_id].update( get_intervals(t1, t2) )
8283

8384
EDGE_TIMELINE = {}
8485
edges = graph.find('{http://www.gexf.net/1.2draft}edges')
8586

8687
for edge in edges.findall('{http://www.gexf.net/1.2draft}edge'):
87-
tag1, tag2= int(edge.get('source')), int(edge.get('target'))
88-
if not (tag1,tag2) in EDGE_TIMELINE:
89-
EDGE_TIMELINE[(tag1,tag2)] = set()
88+
node1, node2 = int(edge.get('source')), int(edge.get('target'))
89+
if not (node1,node2) in EDGE_TIMELINE:
90+
EDGE_TIMELINE[(node1,node2)] = set()
9091
for spell in edge.findall('{http://www.gexf.net/1.2draft}spells/{http://www.gexf.net/1.2draft}spell'):
9192
t1, t2 = int(spell.get('start')), int(spell.get('end'))
92-
EDGE_TIMELINE[(tag1,tag2)].update( get_intervals(t1, t2) )
93+
EDGE_TIMELINE[(node1,node2)].update( get_intervals(t1, t2) )
9394

9495
FRAMES = set()
9596
for interval_list in NODE_TIMELINE.values() + EDGE_TIMELINE.values():
@@ -144,7 +145,7 @@ def add_to_timeline(root_node, node, timestamp):
144145

145146
gdb = GraphDatabase(NEO4J_REST)
146147

147-
tagsidx = gdb.nodes.indexes.create(name="tags_%s" % RUN_NAME, type="fulltext")
148+
actorsidx = gdb.nodes.indexes.create(name="actors_%s" % RUN_NAME, type="fulltext")
148149

149150
REF_NODE = gdb.node[0]
150151
RUN = gdb.node(name=RUN_NAME, type='RUN')
@@ -153,16 +154,16 @@ def add_to_timeline(root_node, node, timestamp):
153154
TLINE = gdb.node(name='TIMELINE', type='TIMELINE', start=START_TIME, stop=STOP_TIME)
154155
RUN.relationships.create("HAS_TIMELINE", TLINE)
155156

156-
TAG_DICT = {}
157-
EDGE_DICT = {}
157+
ACTOR_DICT = {}
158+
INTERACTION_DICT = {}
158159

159160
frame_count = 0
160161
prev_frame = None
161162

162-
tags = set()
163-
edges = set()
164-
frame_tags = []
165-
frame_edges = []
163+
actors = set()
164+
interactions = set()
165+
frame_actors = []
166+
frame_interactions = []
166167

167168
tx = gdb.transaction()
168169

@@ -186,12 +187,12 @@ def add_to_timeline(root_node, node, timestamp):
186187
prev_frame.relationships.create("FRAME_NEXT", frame)
187188
prev_frame = frame
188189

189-
for tag_id in NODE_TIMELINE:
190-
if not interval in NODE_TIMELINE[tag_id]:
190+
for actor_id in NODE_TIMELINE:
191+
if not interval in NODE_TIMELINE[actor_id]:
191192
continue
192-
tags.add(tag_id)
193+
actors.add(actor_id)
193194

194-
frame_tags.append((frame, tag_id))
195+
frame_actors.append((frame, actor_id))
195196

196197
for (id1, id2) in EDGE_TIMELINE:
197198
if not interval in EDGE_TIMELINE[(id1,id2)]:
@@ -200,51 +201,51 @@ def add_to_timeline(root_node, node, timestamp):
200201
if id1 > id2:
201202
(id1, id2) = (id2, id1)
202203

203-
edges.add((id1,id2))
204+
interactions.add((id1,id2))
204205

205-
frame_edges.append((frame, (id1,id2)))
206+
frame_interactions.append((frame, (id1,id2)))
206207

207208
tx.commit()
208209

209210
with gdb.transaction():
210-
print 'Adding %d tag nodes' % len(tags)
211-
for tag_id in tags:
212-
tag = gdb.node(name='TAG_%04d' % tag_id, type='TAG', tag=tag_id)
213-
tagsidx.add('tag_id', tag_id, tag)
214-
TAG_DICT[tag_id] = tag
215-
RUN.relationships.create("RUN_TAG", tag)
216-
217-
print 'Adding %d edge nodes' % len(edges)
218-
for (id1,id2) in edges:
219-
edge = gdb.node(name='EDGE_%04d_%04d' % (id1, id2), type='EDGE', tag1=id1, tag2=id2)
220-
EDGE_DICT[(id1,id2)] = edge
221-
tag1 = TAG_DICT[id1]
222-
tag2 = TAG_DICT[id2]
223-
edge.relationships.create("EDGE_TAG", tag1)
224-
edge.relationships.create("EDGE_TAG", tag2)
225-
RUN.relationships.create("RUN_EDGE", edge)
211+
print 'Adding %d ACTOR nodes' % len(actors)
212+
for actor_id in actors:
213+
actor = gdb.node(name='ACTOR_%04d' % actor_id, type='ACTOR', actor=actor_id)
214+
actorsidx.add('actor_id', actor_id, actor)
215+
ACTOR_DICT[actor_id] = actor
216+
RUN.relationships.create("RUN_ACTOR", actor)
217+
218+
print 'Adding %d INTERACTION nodes' % len(interactions)
219+
for (id1,id2) in interactions:
220+
edge = gdb.node(name='INTERACTION_%04d_%04d' % (id1, id2), type='INTERACTION', actor1=id1, actor2=id2)
221+
INTERACTION_DICT[(id1,id2)] = edge
222+
actor1 = ACTOR_DICT[id1]
223+
actor2 = ACTOR_DICT[id2]
224+
edge.relationships.create("INTERACTION_ACTOR", actor1)
225+
edge.relationships.create("INTERACTION_ACTOR", actor2)
226+
RUN.relationships.create("RUN_INTERACTION", edge)
226227

227228
tx = gdb.transaction(update=False)
228-
print 'Adding %d tag relations to frames' % len(frame_tags)
229-
for i, (frame, tag_id) in enumerate(frame_tags):
229+
print 'Adding %d ACTOR relations to frames' % len(frame_actors)
230+
for i, (frame, actor_id) in enumerate(frame_actors):
230231
if (i+i) % 1000 == 0:
231232
sys.stdout.write('.')
232233
sys.stdout.flush()
233234
tx.commit()
234235
tx = gdb.transaction(update=False)
235-
frame.relationships.create("FRAME_TAG", TAG_DICT[tag_id])
236+
frame.relationships.create("FRAME_ACTOR", ACTOR_DICT[actor_id])
236237
tx.commit()
237238
print
238239

239240
tx = gdb.transaction(update=False)
240-
print 'Adding %d edge relations to frames' % len(frame_edges)
241-
for i, (frame, edge) in enumerate(frame_edges):
241+
print 'Adding %d INTERACTION relations to frames' % len(frame_interactions)
242+
for i, (frame, interaction) in enumerate(frame_interactions):
242243
if (i+1) % 1000 == 0:
243244
sys.stdout.write('.')
244245
sys.stdout.flush()
245246
tx.commit()
246247
tx = gdb.transaction(update=False)
247-
frame.relationships.create("FRAME_EDGE", EDGE_DICT[edge], weight=1)
248+
frame.relationships.create("FRAME_INTERACTION", INTERACTION_DICT[interaction], weight=1)
248249
tx.commit()
249250
print
250251

0 commit comments

Comments
 (0)