Skip to content

Commit

Permalink
chg: [spindumpnosymbols] snake case fixes #125
Browse files Browse the repository at this point in the history
  • Loading branch information
cvandeplas committed Dec 17, 2024
1 parent 489a7a3 commit 4a17c84
Show file tree
Hide file tree
Showing 3 changed files with 49 additions and 85 deletions.
29 changes: 15 additions & 14 deletions src/sysdiagnose/parsers/spindumpnosymbols.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import re
from sysdiagnose.utils.base import BaseParserInterface, logger
from datetime import datetime, timedelta, timezone
from sysdiagnose.utils.misc import snake_case


class SpindumpNoSymbolsParser(BaseParserInterface):
Expand Down Expand Up @@ -76,13 +77,13 @@ def parse_basic(data: list) -> dict:
for line in data:
splitted = line.split(":", 1)
if len(splitted) > 1:
output[splitted[0]] = splitted[1].strip()
output[snake_case(splitted[0])] = splitted[1].strip()

if 'Date/Time' in output:
if 'date_time' in output:
try:
timestamp = datetime.strptime(output['Date/Time'], "%Y-%m-%d %H:%M:%S.%f %z")
timestamp = datetime.strptime(output['date_time'], "%Y-%m-%d %H:%M:%S.%f %z")
except ValueError:
timestamp = datetime.strptime(output['Date/Time'], "%Y-%m-%d %H:%M:%S %z")
timestamp = datetime.strptime(output['date_time'], "%Y-%m-%d %H:%M:%S %z")
output['timestamp'] = timestamp.timestamp()
output['datetime'] = timestamp.isoformat(timespec='microseconds')

Expand All @@ -99,7 +100,7 @@ def parse_processes(data: list, start_timestamp: int) -> list[dict]:
if not init:
process = SpindumpNoSymbolsParser.parse_process(process_buffer)
try:
timestamp = start_time - timedelta(seconds=int(process['Time Since Fork'].rstrip('s')))
timestamp = start_time - timedelta(seconds=int(process['time_since_fork'].rstrip('s')))
except KeyError: # some don't have a time since fork, like zombie processes
timestamp = start_time
process['timestamp'] = timestamp.timestamp()
Expand All @@ -113,7 +114,7 @@ def parse_processes(data: list, start_timestamp: int) -> list[dict]:
process_buffer.append(line.strip())

process = SpindumpNoSymbolsParser.parse_process(process_buffer)
timestamp = start_time - timedelta(seconds=int(process['Time Since Fork'].rstrip('s')))
timestamp = start_time - timedelta(seconds=int(process['time_since_fork'].rstrip('s')))
process['timestamp'] = timestamp.timestamp()
process['datetime'] = timestamp.isoformat(timespec='microseconds')
processes.append(process)
Expand Down Expand Up @@ -146,14 +147,14 @@ def parse_process(data):
process['threads'] = SpindumpNoSymbolsParser.parse_threads(threads)
process['images'] = SpindumpNoSymbolsParser.parse_images(images)
# parse special substrings
process['PID'] = int(re.search(r'\[(\d+)\]', process['Process']).group(1))
process['Process'] = process['Process'].split("[", 1)[0].strip()
process['pid'] = int(re.search(r'\[(\d+)\]', process['process']).group(1))
process['process'] = process['process'].split("[", 1)[0].strip()
try:
process['PPID'] = int(re.search(r'\[(\d+)\]', process['Parent']).group(1))
process['Parent'] = process['Parent'].split("[", 1)[0].strip()
process['ppid'] = int(re.search(r'\[(\d+)\]', process['parent']).group(1))
process['parent'] = process['parent'].split("[", 1)[0].strip()
except KeyError: # some don't have a parent
pass
process['UID'] = 501
process['uid'] = 501
return process

def parse_threads(data):
Expand Down Expand Up @@ -183,10 +184,10 @@ def parse_thread(data):
# Thread Name / DispatchQueue
if "DispatchQueue \"" in data[0]:
dispacthregex = re.search(r"DispatchQueue(.*)\"\(", data[0])
output['DispatchQueue'] = dispacthregex.group(0).split("\"")[1]
output['dispatch_queue'] = dispacthregex.group(0).split("\"")[1]
if "Thread name \"" in data[0]:
dispacthregex = re.search(r"Thread name\ \"(.*)\"", data[0])
output['ThreadName'] = dispacthregex.group(0).split("\"")[1]
output['thread_name'] = dispacthregex.group(0).split("\"")[1]
# priority
if "priority" in data[0]:
priorityregex = re.search(r"priority\ [0-9]+", data[0])
Expand Down Expand Up @@ -220,7 +221,7 @@ def parse_images(data):
image['start'] = clean[0]
image['end'] = clean[2]
image['image'] = clean[3]
image['UUID'] = clean[4][1:-1]
image['uuid'] = clean[4][1:-1]
try:
image['path'] = clean[5]
except: # noqa E722
Expand Down
45 changes: 4 additions & 41 deletions src/sysdiagnose/utils/misc.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
import json
import nska_deserialize
import os
import heapq
import re


def merge_dicts(a: dict, b: dict) -> dict:
Expand Down Expand Up @@ -142,43 +142,6 @@ def find_bytes(d):
return d


def sort_large_file(input_file, output_file, chunk_size=100000):
temp_files = []

try:
# Step 1: Split into sorted chunks
with open(input_file, "r") as infile:
chunk = []
for line in infile:
record = json.loads(line.strip())
chunk.append(record)

# When chunk size is reached, sort and write to a temporary file
if len(chunk) >= chunk_size:
temp_file = f"temp_chunk_{len(temp_files)}.jsonl"
with open(temp_file, "w") as tmp:
for record in sorted(chunk, key=lambda x: x["timestamp"]):
tmp.write(json.dumps(record) + "\n")
temp_files.append(temp_file)
chunk = []

# Sort and write any remaining records
if chunk:
temp_file = f"temp_chunk_{len(temp_files)}.jsonl"
with open(temp_file, "w") as tmp:
for record in sorted(chunk, key=lambda x: x["timestamp"]):
tmp.write(json.dumps(record) + "\n")
temp_files.append(temp_file)

# Step 2: Merge sorted chunks
with open(output_file, "w") as outfile:
open_files = [open(temp_file, "r") for temp_file in temp_files]
iterators = (map(json.loads, f) for f in open_files)
for record in heapq.merge(*iterators, key=lambda x: x["timestamp"]):
outfile.write(json.dumps(record) + "\n")
finally:
# Close all temporary files
for f in open_files:
f.close()
for f in temp_files:
os.remove(f)
def snake_case(s):
# lowercase and replace non a-z characters as _
return re.sub(r'[^a-zA-Z0-9]', '_', s.lower())
60 changes: 30 additions & 30 deletions tests/test_parsers_spindumpnosymbols.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ def test_parsespindumpNS(self):

result = p.get_result()
self.assertGreater(len(result), 1)
self.assertTrue('OS Version' in result[0])
self.assertTrue('os_version' in result[0])

def test_parse_basic(self):
lines = [
Expand All @@ -30,11 +30,11 @@ def test_parse_basic(self):
expected_result = {
'timestamp': 1684960155.759,
'datetime': '2023-05-24T13:29:15.759000-07:00',
'Date/Time': '2023-05-24 13:29:15.759 -0700',
'End time': '2023-05-24 13:29:17.757 -0700',
'OS Version': 'iPhone OS 15.7.6 (Build 19H349)',
'Architecture': 'arm64',
'Report Version': '35.1'
'date_time': '2023-05-24 13:29:15.759 -0700',
'end_time': '2023-05-24 13:29:17.757 -0700',
'os_version': 'iPhone OS 15.7.6 (Build 19H349)',
'architecture': 'arm64',
'report_version': '35.1'
}
result = SpindumpNoSymbolsParser.parse_basic(lines)
self.maxDiff = None
Expand All @@ -47,7 +47,7 @@ def test_parse_basic_nomili(self):
expected_result = {
'timestamp': 1684960155.000,
'datetime': '2023-05-24T13:29:15.000000-07:00',
'Date/Time': '2023-05-24 13:29:15 -0700'
'date_time': '2023-05-24 13:29:15 -0700'
}
result = SpindumpNoSymbolsParser.parse_basic(lines)
self.maxDiff = None
Expand Down Expand Up @@ -86,32 +86,32 @@ def test_parse_process(self):

]
expected_result = {
'Process': 'accessoryd',
'PID': 176,
'PPID': 1,
'UUID': 'BDBDD550-2B15-382C-BB61-1798AFD60460',
'Path': '/System/Library/PrivateFrameworks/CoreAccessories.framework/Support/accessoryd',
'Shared Cache': '6D5223AF-7B75-3593-9CC4-5DBD74C56497 slid base address 0x180734000, slide 0x734000',
'Architecture': 'arm64',
'Parent': 'launchd',
'UID': 501,
'Sudden Term': 'Tracked (allows idle exit)',
'Footprint': '3792 KB',
'Time Since Fork': '201s',
'Num samples': '8 (1-8)',
'Note': '1 idle work queue thread omitted',
'process': 'accessoryd',
'pid': 176,
'ppid': 1,
'uuid': 'BDBDD550-2B15-382C-BB61-1798AFD60460',
'path': '/System/Library/PrivateFrameworks/CoreAccessories.framework/Support/accessoryd',
'shared_cache': '6D5223AF-7B75-3593-9CC4-5DBD74C56497 slid base address 0x180734000, slide 0x734000',
'architecture': 'arm64',
'parent': 'launchd',
'uid': 501,
'sudden_term': 'Tracked (allows idle exit)',
'footprint': '3792 KB',
'time_since_fork': '201s',
'num_samples': '8 (1-8)',
'note': '1 idle work queue thread omitted',
'threads': [
{
'thread': '0x8b', 'DispatchQueue': 'com.apple.main-thread', 'priority': '31',
'thread': '0x8b', 'dispatch_queue': 'com.apple.main-thread', 'priority': '31',
'loaded':
[{'library': 'dyld', 'int': '99536', 'hex': '0x102c504d0'}, {'library': 'accessoryd', 'int': '554572', 'hex': '0x10287b64c'}, {'library': 'Foundation', 'int': '99872', 'hex': '0x1821c1620'}, {'library': 'Foundation', 'int': '97964', 'hex': '0x1821c0eac'}, {'library': 'CoreFoundation', 'int': '123252', 'hex': '0x180ab3174'}, {'library': 'CoreFoundation', 'int': '44944', 'hex': '0x180a9ff90'}, {'library': 'CoreFoundation', 'int': '27784', 'hex': '0x180a9bc88'}, {'library': 'libsystem_kernel.dylib', 'int': '2732', 'hex': '0x1bb3f9aac'}, {'hex': '0xfffffff0071a86d4'}]
}],
'images': [
{'start': '0x1027f4000', 'end': '???', 'image': 'accessoryd', 'UUID': 'BDBDD550-2B15-382C-BB61-1798AFD60460', 'path': '/System/Library/PrivateFrameworks/CoreAccessories.framework/Support/accessoryd'},
{'start': '0x102c38000', 'end': '0x102ca3fff', 'image': 'dyld', 'UUID': '58AB16CE-D7E0-32D3-946D-4F68FB1A4A17', 'path': '/cores/dyld'},
{'start': '0x180a95000', 'end': '0x180ed2fff', 'image': 'CoreFoundation', 'UUID': '717D70C9-3B8E-3ABC-AE16-050588FC3EE8', 'path': '/System/Library/Frameworks/CoreFoundation.framework/CoreFoundation'},
{'start': '0x1821a9000', 'end': '0x18248dfff', 'image': 'Foundation', 'UUID': 'C3A840E1-0D11-32A3-937F-7F668FFB13F0', 'path': '/System/Library/Frameworks/Foundation.framework/Foundation'},
{'start': '0x1bb3f9000', 'end': '0x1bb42cfff', 'image': 'libsystem_kernel.dylib', 'UUID': 'D3BAC787-09EE-3319-BE24-4115817391E2', 'path': '/usr/lib/system/libsystem_kernel.dylib'}
{'start': '0x1027f4000', 'end': '???', 'image': 'accessoryd', 'uuid': 'BDBDD550-2B15-382C-BB61-1798AFD60460', 'path': '/System/Library/PrivateFrameworks/CoreAccessories.framework/Support/accessoryd'},
{'start': '0x102c38000', 'end': '0x102ca3fff', 'image': 'dyld', 'uuid': '58AB16CE-D7E0-32D3-946D-4F68FB1A4A17', 'path': '/cores/dyld'},
{'start': '0x180a95000', 'end': '0x180ed2fff', 'image': 'CoreFoundation', 'uuid': '717D70C9-3B8E-3ABC-AE16-050588FC3EE8', 'path': '/System/Library/Frameworks/CoreFoundation.framework/CoreFoundation'},
{'start': '0x1821a9000', 'end': '0x18248dfff', 'image': 'Foundation', 'uuid': 'C3A840E1-0D11-32A3-937F-7F668FFB13F0', 'path': '/System/Library/Frameworks/Foundation.framework/Foundation'},
{'start': '0x1bb3f9000', 'end': '0x1bb42cfff', 'image': 'libsystem_kernel.dylib', 'uuid': 'D3BAC787-09EE-3319-BE24-4115817391E2', 'path': '/usr/lib/system/libsystem_kernel.dylib'}
]
}
result = SpindumpNoSymbolsParser.parse_process(lines)
Expand All @@ -134,7 +134,7 @@ def test_parse_thread(self):
' *8 ??? [0xfffffff0071a86d4]'
]
expected_result = {
'thread': '0x8b', 'DispatchQueue': 'com.apple.main-thread', 'priority': '31',
'thread': '0x8b', 'dispatch_queue': 'com.apple.main-thread', 'priority': '31',
'loaded': [
{'library': 'dyld', 'int': '99536', 'hex': '0x102c504d0'},
{'library': 'accessoryd', 'int': '554572', 'hex': '0x10287b64c'},
Expand All @@ -155,7 +155,7 @@ def test_parse_thread(self):
' 8 ??? (apsd + 281160) [0x10205ca48]'
]
expected_result = {
'thread': '0x62', 'DispatchQueue': 'com.apple.main-thread', 'priority': '31', 'cputime': '0.005s (4.2M cycles, 1986.9K instructions, 2.14c/i)',
'thread': '0x62', 'dispatch_queue': 'com.apple.main-thread', 'priority': '31', 'cputime': '0.005s (4.2M cycles, 1986.9K instructions, 2.14c/i)',
'loaded': [
{'library': 'dyld', 'int': '99536', 'hex': '0x10236c4d0'},
{'library': 'apsd', 'int': '281160', 'hex': '0x10205ca48'}
Expand All @@ -169,7 +169,7 @@ def test_parse_thread(self):
'*1 ??? (kernel + 850132) [0xffffff80002df8d4] (running)'
]
expected_result = {
'thread': '0x84', 'ThreadName': "IOConfigThread_'foobar'", 'priority': '80', 'cputime': '<0.001s',
'thread': '0x84', 'thread_name': "IOConfigThread_'foobar'", 'priority': '80', 'cputime': '<0.001s',
'loaded': [
{'library': 'kernel', 'int': '850132', 'hex': '0xffffff80002df8d4', 'status': 'running'}
]
Expand Down

0 comments on commit 4a17c84

Please sign in to comment.