forked from uiocompcat/AABBA
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathac_PT_multithread.py
145 lines (108 loc) · 5.24 KB
/
ac_PT_multithread.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
""""Script for apply autocorrelation function"""
from multiprocessing import Pool, cpu_count
import time
import os
from pathlib import Path
import csv
import networkx as nx
import numpy as np
import matplotlib.pyplot as plt
from graph_info import node_info, metal_index, edge_info, vector_feature_PT, vector_feature_NBO
from ac_funtions import *
from utilities import round_csv, save_vectors, join_vectors, round_numbers
# parameters
ac_operator = 'MA'
model_number = 3 # nBB model for PT (1, 2, 3)
depth_max = 8
walk = 'ABBAavg' # AA, BBavg, BB, AB, ABBA
def read_graph(file):
# parameters (copy the same as above)
ac_operator = 'MA'
model_number = 3 # nBB model (1, 2, 3) for GP and (4, 5) for NBO properties
depth_max = 8
walk = 'ABBAavg' # AA, BBavg, BB, AB, ABBAavg, ABBA
# computation dict
comp_dict = {'MA': np.multiply,
'FA': np.multiply,
'MD': np.subtract,
'FD': np.subtract,
'MR': np.divide,
'FR': np.divide,
'MS': np.add,
'FS': np.add}
# path to the gml file graphs
path = Path.home()/'Desktop/phd_stay_Project/Vaskas_project/baseline_graphs'
# feature list
feature_set_PT = vector_feature_PT(depth_max, ac_operator, model_number, walk)
# unpack the feature labels
feature_node, feature_edge, feature_node_depth, feature_edge_depth, \
feature_new1_edge_depth, feature_new2_edge_depth, feature_new3_edge_depth = feature_set_PT
# define the class graph
G = nx.Graph()
file = os.path.join(path, file)
# read the graph
G = nx.read_gml(file)
# add feature_identity attribute to nodes and edges
nx.set_node_attributes(G, 1, "feature_identity")
nx.set_edge_attributes(G, 1, "feature_identity")
# draw graphs
#nx.draw(G, with_labels=True, font_weight='bold')
#plt.show()
# set the starting node
indx = metal_index(G)
# walk over the attributes
node_dict = node_info(G, depth_max, indx)
edge_dict = edge_info(G, depth_max, indx)
# perform AC function
#AA_AC_vector, AA_AC_vname = atom_atom_MC(G, indx, depth_max, node_dict, feature_node, ac_operator, comp_dict)
#AA_FA_vector, AA_FA_vname = atom_atom_F(G, depth_max, feature_node, ac_operator, comp_dict)
#BB_AC_vector, BB_AC_vname, BB_AC_vector_avg, BB_AC_vname_avg = bond_bond_MC(G, depth_max, edge_dict, feature_edge, ac_operator, comp_dict)
#BB_FA_vector, BB_FA_vname = bond_bond_F(G, depth_max, feature_edge, ac_operator, comp_dict)
#AB_AC_vector, AB_AC_vname = bond_atom_MC(G, indx, depth_max, edge_dict, feature_node, feature_edge, ac_operator, comp_dict)
#AB_FA_vector, AB_FA_vname = bond_atom_F(G, depth_max, feature_node, feature_edge, ac_operator, comp_dict)
nBB_AC_vector, nBB_AC_vname = new_gp_bond_bond_MC(G, depth_max, node_dict, edge_dict, model_number, ac_operator, comp_dict)
#nBB_FA_vector, nBB_FA_vname = new_gp_bond_bond_F(G, depth_max, node_dict, model_number, ac_operator, comp_dict)
return nBB_AC_vname # return vector with the name of the graphs
if __name__ == "__main__":
# time of execution
start_time = time.time()
# path to documents
general_path = Path.home()/'Desktop/phd_stay_Project/'
path_to_gml = general_path/'Vaskas_project/baseline_graphs'
path_to_folder = general_path/f'Vaskas_project/PT_{walk}'
path_to_disconnected = general_path/'excluded_graphs/disconnected.txt'
path_to_test = general_path/'test'
# exclude disconnected graphs
#disconnected_graphs = []
#with open(path_to_disconnected) as f:
# contents = f.readlines()
# for i in contents:
# disconnected_graphs.append(i.strip('\n'))
# store gml_files
gml_list = [] # list
for file in os.listdir(path_to_gml):
if file.endswith('.gml'): ##('.gml'):
#if file not in disconnected_graphs: ##('.gml'):
#gml_list.append((path_to_gml/f'{file}'))
#print(type(file), type(Path(file)))
gml_list.append(f'{file}')
# save maximum depth
#get_max_depth(gml_list)
# create a process
with Pool(processes=12-4) as pool:
poolReturn = pool.map(read_graph, gml_list) #, depth_max, ac_operator, model_number, *feature_set)
feature_set_PT = vector_feature_PT(depth_max, ac_operator, model_number, walk)
# unpack the feature labels
feature_node, feature_edge, feature_node_depth, feature_edge_depth, \
feature_new1_edge_depth, feature_new2_edge_depth, feature_new3_edge_depth = feature_set_PT
# select the feature type
feature_type = feature_new3_edge_depth
# save derived vectors in a .csv file
print('Save vectors in a .csv file')
# join the vectors and save vectors
out_dict = join_vectors(poolReturn, feature_type)
save_vectors(path_to_test, out_dict, depth_max, ac_operator, walk, model_number)
# round features of the csv
round_csv(path_to_test, depth_max, ac_operator, walk, model_number)
print("Execution time: " + str(round((time.time() - start_time)/60, 4)) + \
" minutes." + str(cpu_count()))