-
Notifications
You must be signed in to change notification settings - Fork 11
/
Copy pathfind_kallsyms.py
389 lines (358 loc) · 14.5 KB
/
find_kallsyms.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
from collections import namedtuple
import logging
import struct
def align_up(p, a):
return p & ~(a - 1)
def align(p, a):
return (p + (a - 1)) & ~(a - 1)
def try_parse_token_index(rodata, endianness, offset):
index_fmt = endianness + "H"
(index,) = struct.unpack(index_fmt, rodata[offset : offset + 2])
assert index == 0, "The first token index must be 0"
indices = [index]
for _ in range(255):
offset += 2
(index,) = struct.unpack(index_fmt, rodata[offset : offset + 2])
if index <= indices[-1]:
return None # Token indices must be monotonically increasing.
indices.append(index)
return indices
def find_token_indices(rodata, endianness):
token_index_offset = 0
while True:
# kallsyms_token_index is an array of monotonically increasing 256
# shorts, the first of which is 0. It is located right after
# kallsyms_token_table, which is a sequence of null-terminated strings.
# Therefore, look for 1+2 consecutive zeroes.
token_index_offset = rodata.find(b"\x00\x00\x00", token_index_offset) + 1
if token_index_offset == 0 or token_index_offset + 512 > len(rodata):
break
token_index = try_parse_token_index(rodata, endianness, token_index_offset)
if token_index is not None:
yield token_index_offset, token_index
def try_parse_token_table(rodata, token_index, start_offset, end_offset):
tokens = []
for i in range(256):
token_start_offset = start_offset + token_index[i]
if i == 255:
# Last token ends at the end of the table.
token_end_offset = end_offset
else:
# Other tokens end at the start of the next respective token.
token_end_offset = start_offset + token_index[i + 1]
token = rodata[token_start_offset:token_end_offset]
if b"\x00" in token[:-1]:
return None # Tokens must be printable.
if token[-1:] != b"\x00":
return None # Tokens must be null-terminated.
if token[:-1] in tokens:
return None # Tokens must not repeat
tokens.append(token[:-1])
return tokens
def find_token_tables(rodata, token_index, token_index_offset):
# kallsyms_token_table is a sequence of 256 null-terminated strings.
# There may be some padding at the end; skip it.
token_table_end_offset = token_index_offset
while rodata[token_table_end_offset - 2 : token_table_end_offset - 1] == b"\x00":
token_table_end_offset -= 1
# Find the last token by looking for a trailing \0.
last_token_offset = rodata.rfind(b"\x00", 0, token_table_end_offset - 1) + 1
if last_token_offset == 0:
return
# The last kallsyms_token_index element corresponds to the last token.
# Use that information to locate kallsyms_token_table.
token_table_offset = last_token_offset - token_index[-1]
if token_table_offset < 0:
return
token_table = try_parse_token_table(
rodata, token_index, token_table_offset, token_table_end_offset
)
if token_table is not None:
yield token_table_offset, token_table
def find_markers(rodata, marker_fmt, marker_size, marker_offset):
first = True
markers = []
while True:
# kallsyms_markers is an array of monotonically increasing offsets,
# which starts with 0. It is aligned on an 8-byte boundary, so if the
# element size is 4 bytes and their number is odd, it is zero-padded at
# the end.
(marker,) = struct.unpack(
marker_fmt, rodata[marker_offset : marker_offset + marker_size]
)
if first:
first = False
if marker == 0 and marker_size == 4:
# Skip padding.
marker_offset -= marker_size
continue
elif len(markers) > 0 and marker >= markers[-1]:
# The array is not monotonically increasing.
return
markers.append(marker)
if marker == 0:
# We found the first element.
break
marker_offset -= marker_size
if marker_size == 4 and len(markers) == 2:
# Marker size must be 8 bytes, and we must be taking the upper
# part, which is always 0, for the first marker.
return
markers.reverse()
yield marker_offset, markers
def is_name_ok(rodata, token_lengths, offset):
n_tokens = ord(rodata[offset : offset + 1])
if n_tokens == 0 or n_tokens >= 128:
# Tokens are at least one byte long. Names must not be empty, and they
# must be at most 127 characters long.
return False
offset += 1
name_length = 0
for _ in range(n_tokens):
# The caller is expected to have verified that the name entry does not
# span past the end of kallsyms_names, so just fetch the next token.
name_length += token_lengths[ord(rodata[offset : offset + 1])]
if name_length >= 128:
# Name is longer than 127 characters.
return False
offset += 1
return True
def extract_name(rodata, token_table, offset):
# Name must have already been checked, just expand tokens.
n_tokens = ord(rodata[offset : offset + 1])
name = b""
for _ in range(n_tokens):
offset += 1
name += token_table[ord(rodata[offset : offset + 1])]
return name
def find_num_syms(rodata, endianness, token_table, markers_offset):
# kallsyms_names is a sequence of length-prefixed entries ending with
# padding to an 8-byte boundary, followed by kallsyms_markers.
# Unfortunately, some guesswork is required to locate the start of
# kallsyms_names given that we know the start of kallsyms_markers.
num_syms_fmt = endianness + "I"
token_lengths = [len(token) for token in token_table]
# Indexed by (markers_offset - offset). Each element is a number of name
# entries that follow the respective offset, or None if that offset is not
# a start of a valid name entry.
name_counts = [0]
# Whether offset still points to one of the trailing zeroes.
trailing_zeroes = True
offset = markers_offset
while offset >= 9:
offset -= 1
current_byte = ord(rodata[offset : offset + 1])
if current_byte != 0:
# Trailing zeroes have ended.
trailing_zeroes = False
next_name_offset = offset + current_byte + 1
if next_name_offset > markers_offset:
# The current name entry spans past the end of kallsyms_names. This
# is allowed if we are still looking at trailing zeroes.
name_counts.append(0 if trailing_zeroes else None)
continue
next_name_count = name_counts[markers_offset - next_name_offset]
if next_name_count is None:
# The next name entry is invalid, which means the current name
# entry cannot be valid.
name_counts.append(None)
continue
if is_name_ok(rodata, token_lengths, offset):
# The current name entry is valid. Check whether it is preceded by
# kallsyms_num_syms value, which is consistent with the number of
# name entries we've seen so far.
name_counts.append(next_name_count + 1)
num_syms_offset = None
# How kallsyms_num_syms is aligned depends on a particular kernel,
# so try different offsets.
for i in (-4, -8, -12, -16):
(num_syms,) = struct.unpack(
num_syms_fmt, rodata[offset + i : offset + i + 4]
)
if name_counts[-1] == num_syms:
num_syms_offset = offset + i
break
if num_syms != 0:
break
if num_syms_offset is not None:
break
else:
# The current name entry is not valid. This is allowed if we are
# still looking at trailing zeroes.
name_counts.append(0 if trailing_zeroes else None)
else:
return
# We've found kallsyms_names, now parse it.
names = []
for _ in range(name_counts[-1]):
names.append(extract_name(rodata, token_table, offset).decode())
offset += ord(rodata[offset : offset + 1]) + 1
yield num_syms_offset, names
Word = namedtuple("Word", ("size", "fmt", "ctype"))
WORD32 = Word(4, "I", "u32")
WORD64 = Word(8, "Q", "u64")
def find_addresses_no_kallsyms_base_relative(
rodata, endianness, addresses_offset, num_syms, word
):
if addresses_offset + num_syms * word.size > len(rodata):
return
address_fmt = endianness + word.fmt
if word.size == 8 and addresses_offset % 8 != 0:
addresses_offset -= 4
offset = addresses_offset
addresses = []
for _ in range(num_syms):
(address,) = struct.unpack(address_fmt, rodata[offset : offset + word.size])
if len(addresses) > 0 and address < addresses[-1]:
# The resulting addresses are not sorted.
return
addresses.append(address)
offset += word.size
logging.debug(
"0x%08X: %s kallsyms_addresses[]",
addresses_offset,
word.ctype,
)
yield addresses_offset, offset, addresses
def find_addresses_kallsyms_base_relative(
rodata, endianness, addresses_offset, num_syms, word
):
if addresses_offset < 0:
return
addresses_end = addresses_offset + num_syms * 4
relative_base_offset = align(addresses_end, word.size)
relative_base_end = relative_base_offset + word.size
if relative_base_end > len(rodata):
return
raw_addresses = struct.unpack(
endianness + "i" * num_syms, rodata[addresses_offset:addresses_end]
)
(kallsyms_relative_base,) = struct.unpack(
endianness + word.fmt,
rodata[relative_base_offset:relative_base_end],
)
def log_ok():
logging.debug(
"0x%08X: %s kallsyms_relative_base=0x%016X",
relative_base_offset,
word.ctype,
kallsyms_relative_base,
)
logging.debug("0x%08X: u32 kallsyms_offsets[]", addresses_offset)
# Try !KALLSYMS_ABSOLUTE_PERCPU first.
# A lot of small nonnegative numbers will match KALLSYMS_ABSOLUTE_PERCPU
# too, but it's more likely to be !KALLSYMS_ABSOLUTE_PERCPU.
addresses = []
for raw in raw_addresses:
address = kallsyms_relative_base + (raw & 0xFFFFFFFF)
if len(addresses) > 0 and address < addresses[-1]:
# The resulting addresses are not sorted.
break
addresses.append(address)
else:
log_ok()
yield addresses_offset, addresses_end, addresses
# Try KALLSYMS_ABSOLUTE_PERCPU.
addresses = []
for raw in raw_addresses:
if raw >= 0:
address = raw
else:
address = kallsyms_relative_base - 1 - raw
if len(addresses) > 0 and address < addresses[-1]:
# The resulting addresses are not sorted.
break
addresses.append(address)
else:
log_ok()
yield addresses_offset, relative_base_end, addresses
def find_kallsyms_in_rodata(rodata):
for addresses, names in (
(addresses, names)
for endianness in ("<", ">")
for _ in (logging.debug("Endianness: %s", endianness),)
for token_index_offset, token_index in find_token_indices(rodata, endianness)
for _ in (
logging.debug(
"0x%08X: kallsyms_token_index=%s", token_index_offset, token_index
),
)
for token_table_offset, token_table in find_token_tables(
rodata, token_index, token_index_offset
)
for _ in (
logging.debug(
"0x%08X: kallsyms_token_table=%s", token_table_offset, token_table
),
)
# In 6.2 (commits 60443c88f3a8 and 19bd8981dc2e) kallsyms_seqs_of_names
# was added between kallsyms_markers and kallsyms_token_table.
for markers_end_offset in range(token_table_offset, -4, -4)
# In 4.20 the size of markers was reduced to 4 bytes.
for marker_fmt, marker_size in (
(endianness + "I", 4),
(endianness + "Q", 8),
)
for markers_offset, markers in find_markers(
rodata, marker_fmt, marker_size, markers_end_offset - marker_size
)
for _ in (
logging.debug("0x%08X: kallsyms_markers=%s", markers_offset, markers),
)
for num_syms_offset, names in find_num_syms(
rodata, endianness, token_table, markers_offset
)
for _ in (
logging.debug("0x%08X: kallsyms_num_syms=%s", num_syms_offset, len(names)),
)
for word in (WORD64, WORD32)
for _ in (logging.debug("WORD%d", word.size),)
# KALLSYMS_BASE_RELATIVE means that kallsyms_offsets are followed by
# kallsyms_relative_base. This was introduced in 4.6 by commit
# 2213e9a66bb8.
for base_relative in (False, True)
for _ in (
logging.debug("KALLSYMS_BASE_RELATIVE=%s", "y" if base_relative else "n"),
)
# Since kernel 6.4 (commit 404bad70fcf7) kallsyms_addresses come after
# kallsyms_token_index.
for addresses_first in (True, False)
for _ in (logging.debug("addresses_first=%s", addresses_first),)
for addresses_offset, addresses_end, addresses in (
find_addresses_kallsyms_base_relative(
rodata,
endianness,
align_up(
align_up(num_syms_offset, word.size) - word.size - len(names) * 4,
word.size,
)
if addresses_first
else align(token_index_offset + 512, word.size),
len(names),
word,
)
if base_relative
else find_addresses_no_kallsyms_base_relative(
rodata,
endianness,
num_syms_offset - len(names) * word.size
if addresses_first
else token_index_offset + 512,
len(names),
word,
)
)
for _ in (
logging.debug(
"0x%08X: kallsyms[0x%08X]",
addresses_offset if addresses_first else num_syms_offset,
token_index_offset + 512 - addresses_offset
if addresses_first
else (
align(addresses_end, word.size) + len(names) * 3 - num_syms_offset
),
),
)
):
return zip(addresses, names)
return []