-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfeatures_analyzer.py
60 lines (53 loc) · 2.03 KB
/
features_analyzer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
from os import listdir
from os.path import isfile, join
import operator
mypath = '../drebin/feature_vectors'
onlyfiles = [f for f in listdir(mypath) if isfile(join(mypath, f))]
# Print iterations progress
def printProgressBar (iteration, total, prefix = '', suffix = '', decimals = 1, length = 50, fill = '█'):
percent = ("{0:." + str(decimals) + "f}").format(100 * (iteration / float(total)))
filledLength = int(length * iteration // total)
bar = fill * filledLength + '-' * (length - filledLength)
print('\r%s |%s| %s%% %s' % (prefix, bar, percent, suffix), end = '\r')
# Print New Line on Complete
if iteration == total:
print()
features = dict()
def load_malwares():
with open('../drebin/sha256_family.csv', 'r') as f:
raw_data = f.read()
f.close()
raw_data = raw_data.strip()
lista_data = raw_data.split('\n')
for i in range(len(lista_data)):
if i != 0:
lista_malware.append((lista_data[i].split(','))[0])
return lista_malware
def analyzer(lista_file):
printProgressBar(0,len(lista_file),'Progress:','Complete')
lista_top_features = list()
for a in range(len(lista_file)):
printProgressBar(a+1,len(lista_file),'Progress:','Complete')
path = '../drebin/feature_vectors/'+lista_file[a]
with open(path, 'r') as f:
raw_data = f.read()
f.close()
raw_data = raw_data.strip()
#data.append(raw_data.split('\n'))
data = raw_data.split('\n')
for e in range(len(data)):
if data[e] not in features.keys():
features[data[e]] = 1
else:
features[data[e]] += 1
for elem in range(0,8):
most_used_feature = max(features.items(), key=operator.itemgetter(1))[0]
lista_top_features.append(most_used_feature)
features.pop(most_used_feature)
return lista_top_features
if __name__ == "__main__":
lista_malware = list()
lista_malware = load_malwares()
data = list()
dic = list()
analyzer(lista_malware)