-
Notifications
You must be signed in to change notification settings - Fork 10
/
getnetguids.py
326 lines (274 loc) · 15.1 KB
/
getnetguids.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
#!/usr/bin/env python2
import pefile
import struct
import re
import datetime
import csv
import hashlib
guid_regex = re.compile("[a-fA-F0-9]{8}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{12}")
def format_guid_from_hex(hex_string):
first = hex_string[6:8] + hex_string[4:6] + hex_string[2:4] + hex_string[:2]
second = hex_string[10:12] + hex_string[8:10]
third = hex_string[14:16] + hex_string[12:14]
return "{0}-{1}-{2}-{3}-{4}".format(first, second, third, hex_string[16:20], hex_string[20:])
def read_blob(blob):
if len(blob) == 0:
return ""
first_byte = ord(blob[0])
if first_byte & 0x80 == 0:
# easy one
raw_string = blob[1:][:first_byte]
length_determined_string = raw_string[2:][:-2]
if len(length_determined_string) != 0:
return length_determined_string[1:]
return length_determined_string
# Our string is not very long
return ""
def is_dot_net_assembly(pe):
return pe.OPTIONAL_HEADER.DATA_DIRECTORY[14].VirtualAddress != 0
def get_assembly_guids(assembly_path):
try:
try:
pe = pefile.PE(assembly_path)
txt_start = None
txt_end = None
for section in pe.sections:
if section.Name.startswith(".text\x00"):
txt_start = section.PointerToRawData
txt_end = txt_start + section.SizeOfRawData
except pefile.PEFormatError:
return None
if not is_dot_net_assembly(pe):
return None
#Compile TimeDateStamp
try:
compiled = get_compiletime(pe)
except Exception:
compiled = "Error"
# Removed strict parsing and opted for simple searching method to support malformed assemblies
with open(assembly_path, "rb") as assembly_file_handler:
file_data = assembly_file_handler.read()
text_section = file_data[txt_start:][:txt_end]
mdo = pe.get_offset_from_rva(struct.unpack("<IHHI", text_section[8:][:12])[-1])
if mdo < txt_start:
offsets_to_test = [mdo]
else:
offsets_to_test = [mdo - txt_start]
offsets_to_test.extend([l.start() for l in re.finditer("\x42\x53\x4a\x42", text_section)][::-1])
del file_data
for i_offset in offsets_to_test:
i = text_section[i_offset:]
try:
if "\x42\x53\x4a\x42" not in i:
continue
if not i.startswith("\x42\x53\x4a\x42"):
continue
meta_data_offset = i.find("\x42\x53\x4a\x42")
clr_version_length = struct.unpack("<I", i[meta_data_offset + 12:meta_data_offset + 16])[0]
try:
stream_count = struct.unpack("<H", i[meta_data_offset + clr_version_length +
18:meta_data_offset + clr_version_length + 20])[0]
except struct.error:
continue
current_offset = meta_data_offset + clr_version_length + 20
heaps = {}
for c in xrange(stream_count):
offset = struct.unpack("<I", i[current_offset:current_offset + 4])[0]
size = struct.unpack("<I", i[current_offset + 4:current_offset + 8])[0]
current_offset += 8
name = ""
while "\x00" not in name:
name += i[current_offset:current_offset + 4]
current_offset += 4
name = name.strip("\x00")
# print "{0} at {1}, {2} bytes".format(name, offset, size)
heaps[name] = i[meta_data_offset + offset:meta_data_offset + offset + size]
# if len(heaps[name]) != size:
# raise
try:
extracted_mvid = format_guid_from_hex(heaps["#GUID"][:16].encode("hex"))
except KeyError:
return {}
tilde = heaps["#~"]
if tilde is not None:
# print "Reserved: {0}".format([tilde[0:4]])
# print "Major: {0}".format([tilde[4:5]])
# print "Minor: {0}".format([tilde[5:6]])
# print "Heap offset indication: {0}".format([tilde[6:7]])
strings_heap_index_length = 2 if ord(tilde[6:7]) & 0x01 == 0x00 else 4
guid_heap_index_length = 2 if ord(tilde[6:7]) & 0x02 == 0x00 else 4
blob_heap_index_length = 2 if ord(tilde[6:7]) & 0x04 == 0x00 else 4
# print "Reserved 0x01: {0}".format([tilde[7:8]])
# print "Table list: {0}".format([tilde[8:16]])
tables_present = [x == "1" for x in bin(struct.unpack("<Q", tilde[8:16])[0])[2:][::-1]]
# tables_present_count = len([a for a in tables_present if a])
# print "Tables present count: {0}".format(tables_present_count)
# print "Which tables are sorted list: {0}".format([tilde[16:24]])
row_counts = [0] * 64
t_offset = 24
for index in xrange(len(tables_present)):
if index < len(tables_present) and tables_present[index]:
row_counts[index] = struct.unpack("<I", tilde[t_offset:t_offset + 4])[0]
t_offset += 4
has_custom_attribute_tables = [
0x06, 0x04, 0x01, 0x02, 0x08, 0x09, 0x0A, 0x00,
0x0E, # Permission aka DeclSecurity (typo in the spec)
0x17, 0x14, 0x11, 0x1A, 0x1B, 0x20, 0x23, 0x26,
0x27, 0x2A, 0x2C, 0x2B
]
custom_attribute_type_tables = [0x06, 0x0A]
resolution_scope_tables = [0x00, 0x1A, 0x23, 0x01]
type_def_or_ref_tables = [0x02, 0x01, 0x1B]
member_ref_tables = [0x02, 0x01, 0x1A, 0x06, 0x1B]
big_has_custom_attribute = any([row_counts[x] >= 2**(16 - 5) for x in has_custom_attribute_tables])
big_custom_attribute_type = any([row_counts[x] >= 2**(16 - 3) for x in custom_attribute_type_tables])
big_resolution_scope = any([row_counts[x] >= 2**(16 - 2) for x in resolution_scope_tables])
big_type_def_or_ref = any([row_counts[x] >= 2**(16 - 2) for x in type_def_or_ref_tables])
big_member_ref_parent = any([row_counts[x] >= 2**(16 - 3) for x in member_ref_tables])
# Build row length for each type up to CustomAttr
row_type_widths = [
# 0x00 Module = Generation (2 bytes) + Name (String heap index) + Mvid (Guid heap index) +
# EncId (Guid heap index) + EncBaseId (Guid heap index)
2 + strings_heap_index_length + (guid_heap_index_length * 3),
# 0x01 TypeRef = ResolutionScope (ResolutionScope index) + TypeName (String heap) +
# TypeNamespace (String heap)
(4 if big_resolution_scope else 2) + (strings_heap_index_length * 2),
# 0x02 TypeDef = Flags(2 bytes) + TypeName(String heap index) +TypeNamespace(String heap index)+
# Extends (TypeDefOrRef index) + FieldList (index into field table) +
# MethodList (index into MethodDef table) + ?
8 + (4 if big_type_def_or_ref else 2) + (strings_heap_index_length * 2),
0, # 0x03 None
# 0x04 Field = Flags (2 bytes) + Name (String heap index) + Signature (Blob heap index)
2 + strings_heap_index_length + blob_heap_index_length,
0, # 0x05 None
# 0x06 MethodDef = RVA(4 bytes) + ImplFlags(2 bytes) + Flags(2 bytes) + Name(String heap index)+
# Signature (Blob heap index) + ParamList (index to param table)
10 + strings_heap_index_length + blob_heap_index_length,
0, # 0x07 None
# 0x08 Param = Flags (2 bytes) + Sequence (2 bytes) + Name (String heap index)
4 + strings_heap_index_length,
# 0x09 InterfaceImpl = Class (TypeDef index) + Interface (TypeDefOrRef index)
2 + (4 if big_type_def_or_ref else 2),
# 0x0a MemberRef = Class(MemberRefParent) + Name(String heap index) + Signature(Blob heap index)
(4 if big_member_ref_parent else 2) + strings_heap_index_length + blob_heap_index_length,
# 0x0b Constant = Type (?) + Parent + Value (Blob heap index)
4 + blob_heap_index_length,
# 0x0c CustomAttr = Parent + Type (CustomAttributeType) + Value (Blob heap index)
(4 if big_has_custom_attribute else 2) + (4 if big_custom_attribute_type else 2) + blob_heap_index_length,
# Don't care about the rest
]
for index in xrange(0x0c):
t_offset += row_type_widths[index] * row_counts[index]
for index in xrange(row_counts[0x0c]):
# In the most strict interpretation, a typelib id is expressed as a
# GuidAttribute on the current assembly.
# To check that it's actually a GuidAttribute we'd have to support parsing
# .NET signatures, so it's safer to assume a MemberRef attribute owned by a
# TypeRef on an AssemblyRow with a value matching a guid is PROBABLY the typelib id
row_offset = t_offset
if big_has_custom_attribute:
parent_index = struct.unpack("<I", tilde[row_offset:row_offset + 4])[0]
row_offset += 4
else:
parent_index = struct.unpack("<H", tilde[row_offset:row_offset + 2])[0]
row_offset += 2
if big_custom_attribute_type:
type_index = struct.unpack("<I", tilde[row_offset:row_offset + 4])[0]
row_offset += 4
else:
type_index = struct.unpack("<H", tilde[row_offset:row_offset + 2])[0]
row_offset += 2
parent_index_table = parent_index & 0x1f
type_index_table = type_index & 0x07
# We only really care if the parent is an Assembly and the attribute is constructed
# using a MemberRef. MemberRef because a MethodDef is never going to be used for a
# GuidAttribute. This is because GuidAttribute is from mscorlib, so always an external
# assembly, so always reached via TypeRef/MemberRef.
if parent_index_table == 0x0e and type_index_table == 0x03:
if blob_heap_index_length == 2:
blob_index = struct.unpack("<H", tilde[row_offset:row_offset + 2])[0]
row_offset += 2
else:
blob_index = struct.unpack("<I", tilde[row_offset:row_offset + 4])[0]
row_offset += 4
data_value = read_blob(heaps["#Blob"][blob_index:])
if guid_regex.match(data_value):
return {"mvid": extracted_mvid.lower(), "typelib_id": data_value.lower(), "compiled": compiled}
t_offset += row_type_widths[0x0c]
return {"mvid": extracted_mvid.lower(), "compiled": compiled}
except KeyboardInterrupt:
raise
except:
pass
except KeyboardInterrupt:
raise
except:
return {}
return {}
if __name__ == "__main__":
from argparse import ArgumentParser
version = "1.4.2"
parser = ArgumentParser(
prog=__file__,
description="Extracts Typelib IDs and MVIDs from .NET assemblies.",
version="%(prog)s v" + version + " by Brian Wallace (@botnet_hunter)",
epilog="%(prog)s v" + version + " by Brian Wallace (@botnet_hunter)"
)
parser.add_argument('path', metavar='path', type=str, nargs='*', default=[],
help="Paths to files or directories to scan")
parser.add_argument('-r', '--recursive', default=False, required=False, action='store_true',
help="Scan paths recursively")
parser.add_argument('-c', '--csv', default=False, required=False, action='store_true',
help="Save to CSV")
args = parser.parse_args()
if args.path is None or len(args.path) == 0:
if not args.stdin:
parser.print_help()
exit()
from os.path import isfile, isdir, join, abspath
from glob import iglob
def scan_paths(paths, recursive):
while len(paths) != 0:
temporary_file_path = abspath(paths[0])
del paths[0]
if isfile(temporary_file_path):
yield temporary_file_path, get_assembly_guids(temporary_file_path)
elif isdir(temporary_file_path):
for p in iglob(join(temporary_file_path, "*")):
p = join(temporary_file_path, p)
if isdir(p) and recursive:
paths.append(p)
if isfile(p):
yield p, get_assembly_guids(p)
def get_compiletime(pe):
return datetime.datetime.fromtimestamp(pe.FILE_HEADER.TimeDateStamp)
if args.csv:
theCSV = open('out.csv', 'wt')
writer=csv.writer(theCSV)
writer.writerow(('TYPELIB', 'MVID', 'HASH', 'COMPILED', 'PATH'))
else:
print "{0}\t\t\t\t\t{1}\t\t\t\t\t{2}\t\t\t\t\t{3}\t\t{4}".format('TYPELIB', 'MVID', 'HASH', 'COMPILED', 'PATH')
for file_path, result in scan_paths(args.path, args.recursive):
if result is None:
continue
try:
typelib_id = result["typelib_id"]
except KeyError:
typelib_id = "None\t\t\t\t"
try:
mvid = result["mvid"]
except KeyError:
# Potentially should log these results as they should at least have an MVID
continue
try:
compiled = result["compiled"]
except KeyError:
compiled = "None\t\t\t\t\t"
with open(file_path, 'rb') as f:
s = hashlib.sha256(f.read()).hexdigest()
if args.csv:
writer.writerow((typelib_id, mvid, s, compiled, file_path))
else:
print "{0}\t{1}\t{2}\t{3}\t{4}".format(typelib_id, mvid, s, compiled, file_path)
if args.csv:
theCSV.close()