-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path1_good_setting_test-val.py
171 lines (124 loc) · 5.31 KB
/
1_good_setting_test-val.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
import sys
import os
import json
import pandas as pd
import time
import datetime
#*********************************************************************************
#*
#* this program will search all json files under "val folder", then categorize them into different behavior label combinations
#* Then, select json file that its OOB > 90% and has the best macro f1, and save its "setting" for each label combinations.
#* using above "setting" to search json files under "test folder" that have same setting.
#* Finally, group these selected json file into a folder (best_combinations_test) whihc is under "test folder"
#*
#*
#**********************************************************************************
#* create a list that includes all paths of log files
def all_log_list(path):
path = path.replace(" ","")
if not path.endswith('/'):
path= path +'/'
log_list=[]
for dirpath,dirname,filename in os.walk(path):
for f in filename:
if f.endswith(".log"):
file_path = dirpath+'/'+f
log_list.append(file_path)
return log_list
#* create a list that includes all paths of json files
def all_json_list(path):
path = path.replace(" ","")
if not path.endswith('/'):
path= path +'/'
json_list=[]
for dirpath,dirname,filename in os.walk(path):
for f in filename:
if f.endswith(".json"):
file_path = dirpath+'/'+f
json_list.append(file_path)
return json_list
#* chose last itme from path as folder name
def get_folder_name(path):
file_name = path.split('/')[-1]
return file_name
def get_setting_from_name(folder, num):
return folder.split('_')[num]
#* create a dict that key is selected behavior label and value are all paths of coresponding json files
#* ex {label-1 : [./xxx/xxx/label-1.json, ./xxx/xxx/label-1.json, ./xxx/xxx/label-1.json] }
def collected_file_into_label_dict(file_list, label_vs_list):
for file_path in file_list:
label = get_setting_from_name(get_folder_name(file_path),1)
if label not in label_vs_list:
label_vs_list[label] = [file_path]
else:
label_vs_list[label].append(file_path)
#* get all ML setting from file name
def get_setting(file_path):
file_setting = get_folder_name(file_path)
family = get_setting_from_name(file_setting, 0)
leaf = get_setting_from_name(file_setting, 2)
feature = get_setting_from_name(file_setting, 3)
estimator = get_setting_from_name(file_setting, 4)
max_depth = get_setting_from_name(file_setting, 5)
split = get_setting_from_name(file_setting, 6)
new = leaf+'_'+ feature+'_'+ estimator+'_'+ max_depth+'_'+ split
return new
#* to calculate the macro-f1
def get_aver_of_dict(j_dict):
aver_f1 = 0.0
for key in j_dict:
aver_f1= j_dict[key] + aver_f1
aver_f1 = aver_f1/len(j_dict)
return aver_f1
def down_level_for_path(path):
if not path.endswith('/'):
path = path+'/'
last_folder = path.split('/')[-2]
length = len(last_folder)+1
return path[0:(-1* length)]
if __name__ == "__main__":
path = sys.argv[1]
path_2 = sys.argv[2]
log_list = all_log_list(path)
json_list = all_json_list(path)
print(log_list)
label_vs_all_log = {}
collected_file_into_label_dict(json_list, label_vs_all_log)
best = {}
#* this part selected the json files for each label from val folder and the json files' OOB is larger than 90%
#* Then, calculate their marco f1
# * Chose the json file that has best macro f1 among those json files for each label, then save this setting
#* Using above got setting to selected json file from test folder in order to furture using
for label in label_vs_all_log:
one_label = []
for file in label_vs_all_log[label]:
if file.endswith('.json'):
j = open(file)
j_data = json.load(j)
aver_f1 = get_aver_of_dict(j_data['Each_f1']['Round_1'])
OOB = j_data['All_parameter']['For_all']['OOB']
if OOB > 0.90:
current_setting = get_setting(file)
one_label.append((aver_f1, current_setting, file))
j.close()
one_label.sort(reverse=True)
best[label] = one_label[0]
print(best)
best_folder = path+'/'+'best_combinations_val/'
os.mkdir(best_folder)
for key in best:
os.system('cp -r %s %s'%(down_level_for_path( best[key][2] ) , best_folder+key+'_'+get_setting(best[key][2])))
label_vs_all_log_2 = {}
json_list_2 = all_json_list(path_2)
collected_file_into_label_dict(json_list_2, label_vs_all_log_2)
best_folder_2 = path_2+'/'+'best_combinations_test/'
os.mkdir(best_folder_2)
for label in label_vs_all_log_2:
good_setting = best[label][1]
print(good_setting)
print(label)
for file in label_vs_all_log_2[label]:
current_setting = get_setting(file)
if current_setting ==good_setting:
os.system('cp -r %s %s'%(down_level_for_path( file ) , best_folder_2+label+'/' ) )
print('cp -r %s %s'%(down_level_for_path( file ) , best_folder_2+label+'/' ) )