Skip to content

Commit 24bb7b2

Browse files
committed
add some codeql-script
1 parent 7571cff commit 24bb7b2

File tree

3 files changed

+622
-0
lines changed

3 files changed

+622
-0
lines changed

codeql-script/auto_detect.py

Lines changed: 309 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,309 @@
1+
from genericpath import isfile
2+
import os
3+
import sys
4+
from os.path import join, getsize
5+
# import openpyxl
6+
# from openpyxl import Workbook
7+
# from openpyxl import load_workbook
8+
import time
9+
import json
10+
11+
def get_time():
12+
time_str = time.strftime("%Y-%m-%d-%H_%M_%S", time.localtime()) + '\n'
13+
return time_str
14+
15+
def get_data_json(in_path):
16+
out_list = list()
17+
with open(in_path, 'r') as f:
18+
lines = f.readlines()
19+
for line in lines:
20+
line = line.strip('\n')
21+
one = json.loads(line)
22+
out_list.append(one)
23+
return out_list
24+
25+
# def get_data_excel(in_path):
26+
# wb= openpyxl.load_workbook(in_path)
27+
# # 第二步选取表单
28+
# sheet = wb.active
29+
# # 按行获取数据转换成列表
30+
# rows_data = list(sheet.rows)
31+
# # 获取表单的表头信息(第一行),也就是列表的第一个元素
32+
# titles = [title.value for title in rows_data.pop(0)]
33+
# # print(titles)
34+
35+
# # 整个表格最终转换出来的字典数据列表
36+
# all_row_dict = []
37+
# # 遍历出除了第一行的其他行
38+
# for a_row in rows_data:
39+
# the_row_data = [cell.value for cell in a_row]
40+
# # 将表头和该条数据内容,打包成一个字典
41+
# row_dict = dict(zip(titles, the_row_data))
42+
# # print(row_dict)
43+
# all_row_dict.append(row_dict)
44+
# return all_row_dict
45+
46+
def get_file_size(filepath):
47+
48+
res = 0
49+
if not os.path.exists(filepath):
50+
print(filepath + ' not exists!')
51+
# print('\n')
52+
exit(1)
53+
# 判断输入是文件夹还是文件
54+
if os.path.isdir(filepath):
55+
# 如果是文件夹则统计文件夹下所有文件的大小
56+
for root, dirs, files in os.walk(filepath):
57+
res += sum([getsize(join(root, name)) for name in files])
58+
elif os.path.isfile(filepath):
59+
# 如果是文件则直接统计文件的大小
60+
res += os.path.getsize(filepath)
61+
# 格式化返回大小
62+
# bu = 1024
63+
# if res < bu:
64+
# res = f'{bu}B'
65+
# elif bu <= res < bu**2:
66+
# res = f'{round(res / bu, 3)}KB'
67+
# elif bu**2 <= res < bu**3:
68+
# res = f'{round(res / bu**2, 3)}MB'
69+
# elif bu**3 <= res < bu**4:
70+
# res = f'{round(res / bu**3, 3)}GB'
71+
# elif bu**4 <= res < bu**5:
72+
# res = f'{round(res / bu**4, 3)}TB'
73+
return res
74+
75+
def if_big(database_path):
76+
print(database_path)
77+
res = get_file_size(database_path) / (1024*1024)
78+
print(str(res) + 'M')
79+
if res > 500:
80+
return True
81+
else:
82+
return False
83+
84+
def gen_ql_code(ql_dir, big_flag, database):
85+
ql_path = ql_dir + '/'
86+
ql_name = database['malloc_api'] + '-' + database['free_api'] + '.ql'
87+
ql_path = ql_dir + '/' + ql_name
88+
# ql_path2 = ql_dir + '/big_' + ql_name
89+
ql_prefix = ql_dir + '/prefix.ql'
90+
# backup_dir = ql_dir + '/oldbak/'
91+
if big_flag:
92+
ql_after = ql_dir + '/big_after.ql'
93+
else:
94+
ql_after = ql_dir + '/after.ql'
95+
96+
# ql_after_big = ql_dir + '/big_after.ql'
97+
98+
# ql_after = ql_dir + '/after.ql'
99+
# if os.path.isfile(ql_path):
100+
101+
# mv_cmd = 'mv ' + ql_path + ' ' + backup_dir
102+
# os.system(mv_cmd)
103+
# with open(log_file, 'a') as f:
104+
# f.write('Execute MV:' + mv_cmd + '\n\n')
105+
# if os.path.isfile(ql_path2):
106+
107+
# mv_cmd = 'mv ' + ql_path2 + ' ' + backup_dir
108+
# os.system(mv_cmd)
109+
# with open(log_file, 'a') as f:
110+
# f.write('Execute MV:' + mv_cmd + '\n\n')
111+
source_exp = int(database['malloc_index'])
112+
sink_exp = int(database['free_index'])
113+
sourceFC = database['malloc_api'].strip(' ')
114+
sinkFC = database['free_api'].strip(' ')
115+
ifflag = 'false'
116+
# filter_list_raw = database['filter']
117+
filter_list = list()
118+
# if filter_list_raw:
119+
# filter_list = filter_list_raw.split(', ')
120+
if ifflag:
121+
flag = 'true'
122+
else:
123+
flag = 'false'
124+
print(ql_name)
125+
if source_exp == -1:
126+
change_sourcefc = '''
127+
Expr getSourceExpr(FunctionCall fc)
128+
{
129+
result = fc //sqlite3_open
130+
}
131+
'''
132+
else:
133+
change_sourcefc = '''
134+
Expr getSourceExpr(FunctionCall fc)
135+
{
136+
result = fc.getArgument(''' + str(source_exp) +''')
137+
}
138+
'''
139+
140+
change_code = change_sourcefc + '''
141+
Expr getSinkExp(FunctionCall fc)
142+
{
143+
result = fc.getArgument(''' + str(sink_exp) +''')
144+
}
145+
146+
predicate isSourceFC(FunctionCall fc)
147+
{
148+
fc.getTarget().hasName("''' + sourceFC + '''")
149+
}
150+
151+
predicate isSinkFC(FunctionCall fc)
152+
{
153+
fc.getTarget().hasName("''' + sinkFC + '''")
154+
}
155+
boolean ifTestFlag()
156+
{
157+
result = ''' + flag + '''
158+
}
159+
'''
160+
filter_code = ''
161+
# if filter_list_raw:
162+
# for filter in filter_list:
163+
# filter = filter.strip('\n')
164+
# filter = filter.strip(' ')
165+
# filter_code = filter_code + 'and not f.getBaseName().toString() = "' + filter + '"\n'
166+
final_code = filter_code + 'select malloc, malloc.getLocation().toString()\n'
167+
with open(ql_prefix, 'r') as f:
168+
prefix = f.read()
169+
with open(ql_after, 'r') as f:
170+
after = f.read()
171+
# with open(ql_after_big, 'r') as f:
172+
# after_big = f.read()
173+
qlcode = prefix + change_code + after
174+
with open(ql_path, 'w') as f:
175+
f.write(qlcode + final_code)
176+
# with open(log_file, 'a') as f:
177+
# f.write(get_time() + 'Create a QL code:' + ql_path + '\n\n')
178+
# with open(ql_path2, 'w') as f:
179+
# f.write(prefix + change_code + after_big + final_code)
180+
# with open(log_file, 'a') as f:
181+
# f.write(get_time() + 'Create a QL code:' + ql_path2 + '\n\n')
182+
return ql_path
183+
184+
def sort_by_size(database_list, database_dir):
185+
size_dict = dict()
186+
size_list = list()
187+
out_list = list()
188+
for data in database_list:
189+
database_path = database_dir + '/all-database' + '/' + data['repo']
190+
size_res = get_file_size(database_path) / (1024*1024)
191+
size_dict[data['repo']] = size_res
192+
# size_dict['size'] = size_res
193+
# size_list.append(size_dict)
194+
for key in size_dict.keys():
195+
tmp_dict = dict()
196+
tmp_dict['repo'] = key
197+
tmp_dict['size'] = size_dict[key]
198+
size_list.append(tmp_dict)
199+
size_list.sort(key=lambda k: (k.get('size', 0)))
200+
print(size_list)
201+
# exit(1)
202+
for size_item in size_list:
203+
repo = size_item['repo']
204+
for data in database_list:
205+
if data['repo'] == repo:
206+
data['size'] = size_item['size']
207+
out_list.append(data)
208+
for item in out_list:
209+
print(item['repo'] + ' size: ' + str(item['size']))
210+
# exit(1)
211+
return out_list
212+
213+
if __name__ == '__main__':
214+
# in_list = 'list'
215+
# #in Ubuntu 18.04(vmware)
216+
# codeql_dir = '/home/icy/Desktop/CodeQL/'
217+
# database_dir = codeql_dir + '/database/'
218+
# ql_dir = codeql_dir + '/vscode-codeql-starter/codeql-custom-queries-cpp/'
219+
# out_dir = codeql_dir + '/output/gtout/'
220+
# log_path = out_dir + 'log'
221+
# in_path = out_dir + in_list
222+
223+
# # in windows
224+
# # codeql_dir = '/home/icy/Desktop/CodeQL/'
225+
# database_dir = 'E:/win-database/'
226+
# ql_dir = 'D:/CGit/vscode-codeql-starter/codeql-custom-queries-cpp/'
227+
# out_dir = 'D:/CGit/output/gtout/'
228+
# log_path = out_dir + 'log'
229+
# in_path = out_dir + in_list
230+
231+
#in UOS (vmware)
232+
if len(sys.argv) != 5:
233+
print('wrong input arg')
234+
print('Usage: python3 ./auto_detect.py <codeql_dir> <findres> <output_dir> <database_dir>')
235+
exit(1)
236+
237+
# codeql_dir = '/home/icy/Desktop/CodeQL/'
238+
# out_dir = codeql_dir + '/output/'
239+
# in_list = 'list'
240+
codeql_dir = sys.argv[1] + '/'
241+
out_dir = sys.argv[3] + '/'
242+
in_path = sys.argv[2]
243+
244+
database_dir = sys.argv[4] + '/'
245+
ql_dir = codeql_dir + '/vscode-codeql-starter/codeql-custom-queries-cpp/'
246+
log_path = out_dir + '/detect_log'
247+
if not os.path.exists(out_dir):
248+
os.mkdir(out_dir)
249+
# in_path = in_list
250+
# codeql database analyze ./database/libspatialite-5fb62fc-small --rerun --format=csv --output=./output/libspatialite-5fb62fc-small.csv --ram=2048 ./vscode-codeql-starter/codeql-custom-queries-cpp/search_api.ql
251+
data = get_data_json(in_path)
252+
data = sort_by_size(data, database_dir)
253+
# for data_json in data:
254+
# print(data_json)
255+
# excel_path = './test.xlsx'
256+
257+
258+
# data = get_data_excel(excel_path)
259+
# # print(data)
260+
# os.chdir('D:/CGit/codeql')
261+
out_csv_list = list()
262+
for database in data:
263+
# if database['Check'] == 1:
264+
# continue
265+
database_name = database['repo']
266+
database_path = database_dir + '/all-database' + '/' + database_name
267+
# database['path'] = database_path
268+
if database['size'] > 550:
269+
big_flag = True
270+
else:
271+
big_flag = False
272+
# big_flag = False
273+
ql_path = gen_ql_code(ql_dir, big_flag, database)
274+
# ql_name = database['bigflag'] + database['API1'] + '-' + database['API2'] + '.ql'
275+
# ql_path = ql_dir + '/' + ql_name
276+
if not big_flag:
277+
out_csv_path = out_dir + database_name + database['malloc_api'] + '-' + database['free_api'] + '.csv '
278+
else:
279+
out_csv_path = out_dir + 'big_' + database_name + '-' + database['malloc_api'] + '-' + database['free_api'] + '.csv '
280+
281+
282+
# codeql_analyze = 'codeql database analyze ' + database_path + ' --rerun --format=csv --output=' + out_dir + database['bigflag'] + database_name + database['API1'] +'.csv --ram=2048 --threads=2 ' + ql_path
283+
codeql_analyze = 'codeql database analyze ' + database_path + ' --rerun --threads=4 --format=csv --output=' + out_csv_path + ql_path
284+
with open(log_path, 'a') as f:
285+
f.write(get_time() + 'Execute codeql analyze:\n' + codeql_analyze + '\n')
286+
start = time.time()
287+
os.system(codeql_analyze)
288+
end = time.time()
289+
with open(log_path, 'a') as f:
290+
f.write('Total time: ' + str(end-start) + 's\n\n')
291+
print(codeql_analyze)
292+
# exit(1)
293+
os.remove(ql_path)
294+
out_csv_list.append(out_csv_path)
295+
# time.sleep(60)
296+
# for out_csv_path in out_csv_list:
297+
# # if not os.path.exists(out_csv_path):
298+
# # continue
299+
# if os.path.exists(out_csv_path):
300+
# if os.path.getsize(out_csv_path) != 0:
301+
# print('Remove out file: ' + out_csv_path)
302+
# os.remove(out_csv_path)
303+
# with open(log_path, 'a') as f:
304+
# f.write(f'No wrong result! Delete out csv file: {out_csv_path}\n')
305+
# break
306+
307+
308+
309+

0 commit comments

Comments
 (0)