forked from dgg32/transfer_kg
-
Notifications
You must be signed in to change notification settings - Fork 0
/
import_supplement_node.py
71 lines (48 loc) · 2.17 KB
/
import_supplement_node.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
import json
import sys, os
from neo4j import GraphDatabase
import pandas as pd
#python import_node.py <hetionet_data_file> ip
class import_data:
def __init__(self, uri, user, password):
self.driver = GraphDatabase.driver(uri, auth=(user, password))
def close(self):
self.driver.close()
def add_nodes_connections(self, filename, node_type):
with self.driver.session() as session:
with session.begin_transaction() as tx:
df = pd.read_csv(filename, sep=',')
for index, row in df.iterrows():
name = row["name"].replace("'", "\\'")
properties = {}
for k in df.columns:
if k != "name":
v = row[k]
if isinstance(v, str):
v = v.replace("'", "\\'")
if isinstance(v, list):
v = str(v).replace(r"'", r"\'")
properties[k] = v
query = f"MERGE (a:`{node_type}` " + "{" + "name: " + f"'{name}'" + "}) "
query += f"ON CREATE SET "
for k in properties:
query += f"a.{k} = '{properties[k]}', "
query = query[:-2]
query += "ON MATCH SET "
for k in properties:
query += f"a.{k} = '{properties[k]}', "
query = query[:-2]
query += ";"
#print ("query:", query)
tx.run(query)
if index % 100 == 0:
tx.commit()
tx = session.begin_transaction()
tx.commit()
import_csv_file = sys.argv[1]
ip = sys.argv[2]
password = sys.argv[3]
node_type = sys.argv[4]
connection = import_data(f"bolt://{ip}:7687", "neo4j", password)
connection.add_nodes_connections(import_csv_file, node_type)
connection.close()