Skip to content

Commit

Permalink
Merge pull request #2 from Cskorpion/full_pypylog
Browse files Browse the repository at this point in the history
full pypylog stacks
  • Loading branch information
Cskorpion authored May 6, 2024
2 parents f711fa5 + f9fe19e commit 09809de
Show file tree
Hide file tree
Showing 3 changed files with 176 additions and 30 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/pytest.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ name: Pytest

on:
push:
branches: [ "main" ]
branches: [ "main", "full_pypylog" ]
pull_request:
branches: [ "main" ]

Expand Down
75 changes: 56 additions & 19 deletions vmprofconvert/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,11 @@
CATEGORY_GC = 7
CATEGORY_INTERPRETER = 8

PPL_TIME = 0
PPL_ACTION = 1
PPL_STARTING = 2
PPL_DEPTH = 3

def convert(path):
stats = vmprof.read_profile(path)
c = Converter()
Expand Down Expand Up @@ -45,9 +50,6 @@ def convert_stats_with_pypylog(vmprof_path, pypylog_path, times):
total_runtime_micros = (times[1] - times[0]) * 1000000
pypylog = cut_pypylog(pypylog, total_runtime_micros, stats.get_runtime_in_microseconds())
pypylog = rescale_pypylog(pypylog, stats.get_runtime_in_microseconds())
pypylog = filter_top_level_logs(pypylog)
#for tid in list(c.threads.keys()):
# c.create_pypylog_marker(pypylog, tid)
c.walk_pypylog(pypylog)
return c.dumps_vmprof(stats), c.create_path_dict()# json_profile, file path dict

Expand Down Expand Up @@ -95,20 +97,44 @@ def walk_pypylog(self, pypylog):
plthread.name = "PyPyLog"
plthread.tid = tid
if plthread and pypylog:
plthread.create_pypylog_marker(pypylog)
for i in range(int(len(pypylog)/2)):
log_start = pypylog[2 * i]
log_end = pypylog[2 * i + 1]
logname = log_start[1]
logtime_start = log_start[0]
logtime_end = log_end[0]
self.add_pypylog_sample(plthread, logname, logtime_start, logtime_end)
if i < ((len(pypylog)/2) - 2):
next_log = pypylog[2 * i + 2]
next_logtime_start = next_log[0]
if abs(logtime_end - next_logtime_start) > 2:
self.add_pypylog_interp_sample(plthread, logtime_end + 1, next_logtime_start - 1)

ppl_stack = []
last_log = None
last_close_time = -1
mdiff = pypylog[1][PPL_TIME] - pypylog[0][PPL_TIME]# minimal time interval to squeeze an interpreter sample in between
for i in range(len(pypylog)):
log = pypylog[i]
if log[PPL_STARTING]:
ppl_stack.append(log)
else:
ppl_stack.append(log)
if last_log[PPL_ACTION] == log[PPL_ACTION]: # Only top level actions wanted e.g. A[ B[ B]A] => Sample: A->B not A->B, A
if len(plthread.samples) >= 2 and ppl_stack[-2][PPL_TIME] - last_close_time >= mdiff:# dont add interp sample at start
self.add_pypylog_interp_sample(plthread, last_close_time + 1, ppl_stack[-2][PPL_TIME] - 1)
plthread.create_single_pypylog_interpreter_marker(last_close_time + 1, ppl_stack[-2][PPL_TIME] - 1)
self.add_pypylog_sample_from_stack(plthread, ppl_stack)
plthread.create_single_pypylog_marker(ppl_stack[-2], ppl_stack[-1])
#assert log[PPL_ACTION] == ppl_stack[-1][PPL_ACTION] ### TODO: Remove later
last_close_time = log[PPL_TIME]
ppl_stack.pop()
ppl_stack.pop()
last_log = log

def add_pypylog_sample_from_stack(self, thread, stack_list):
#assert stack_list[-2][PPL_ACTION] == stack_list[-1][PPL_ACTION] ### TODO: Remove later
frames = []
categories = []
for log in stack_list[:-1]:# last frame is double (open + closed)
if "gc" in log[PPL_ACTION]:
categories.append(CATEGORY_GC)
elif "jit" in log[PPL_ACTION]:
categories.append(CATEGORY_JIT)
frames.append(thread.add_frame(log[PPL_ACTION], -1, "", categories[-1], -1, -1))
stackindex = thread.add_stack(frames, categories)
start_time = stack_list[-2][PPL_TIME]
end_time = stack_list[-1][PPL_TIME]
thread.add_sample(stackindex, start_time)
thread.add_sample(stackindex, end_time)

def add_pypylog_sample(self, thread, logname, logtime_start, logtime_end):
if "gc" in logname:
category = CATEGORY_GC
Expand All @@ -119,11 +145,11 @@ def add_pypylog_sample(self, thread, logname, logtime_start, logtime_end):
thread.add_sample(stackindex, logtime_start)
thread.add_sample(stackindex, logtime_end)

def add_pypylog_interp_sample(self, thread, logtime_end, next_logtime_start):
def add_pypylog_interp_sample(self, thread, logtime_start, logtime_end):
frameindex = thread.add_frame("interp", -1, "", CATEGORY_INTERPRETER, -1, -1)
stackindex = thread.add_stack([frameindex], [CATEGORY_INTERPRETER])
thread.add_sample(stackindex, logtime_start)
thread.add_sample(stackindex, logtime_end)
thread.add_sample(stackindex, next_logtime_start)


def walk_samples(self, stats):
Expand Down Expand Up @@ -459,6 +485,17 @@ def create_pypylog_marker(self, pypylog):
next_logtime_start = next_log[0]
if abs(endtime - next_logtime_start) > 2:
self.add_marker(endtime + 1, next_logtime_start - 1, interperter_string_id)

def create_single_pypylog_marker(self, start_log, stop_log):
starttime = start_log[PPL_TIME]
endtime = stop_log[PPL_TIME]
name = start_log[PPL_ACTION]
st_id = self.add_string(name)
self.add_marker(starttime, endtime, st_id)

def create_single_pypylog_interpreter_marker(self, starttime, endtime):
st_id = self.add_string("interpreter")## move out
self.add_marker(starttime, endtime, st_id)

def add_marker(self, starttime, endtime, stringtable_index):
self.markers.append([starttime, endtime, stringtable_index])
Expand Down
129 changes: 119 additions & 10 deletions vmprofconvert/test/test_convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from vmprofconvert import convert_stats
from vmprofconvert import Converter
from vmprofconvert import Thread
from vmprofconvert import CATEGORY_PYTHON, CATEGORY_NATIVE, CATEGORY_JIT, CATEGORY_ASM, CATEGORY_JIT_INLINED, CATEGORY_MIXED, CATEGORY_INTERPRETER
from vmprofconvert import CATEGORY_PYTHON, CATEGORY_NATIVE, CATEGORY_JIT, CATEGORY_ASM, CATEGORY_JIT_INLINED, CATEGORY_MIXED, CATEGORY_INTERPRETER, CATEGORY_GC
from vmprofconvert.pypylog import parse_pypylog, cut_pypylog, rescale_pypylog, filter_top_level_logs
from vmprofconvert.__main__ import write_file_dict, save_zip, load_zip_dict, extract_files

Expand Down Expand Up @@ -429,18 +429,38 @@ def test_filter_top_level_logs():
assert filtered_pypylog[6][1] == filtered_pypylog[7][1]# not nested action
assert filtered_pypylog[6][2] != filtered_pypylog[7][2]# action start => action end

def test_create_pypylog_marker():
def test_create_pypylog_marker(): # create_pypylog_marker is no longer in use
pypylog_path = os.path.join(os.path.dirname(__file__), "profiles/pystone.pypylog")
t = Thread()
initial_pypylog = parse_pypylog(pypylog_path)
rescaled_pypylog = rescale_pypylog(initial_pypylog[:1000], 10000000)
filtered_pypylog = filter_top_level_logs(rescaled_pypylog[:20])
t.create_pypylog_marker(filtered_pypylog)
t.create_pypylog_marker(filtered_pypylog)# marker do not use categorys, but strings
assert t.markers[0] == [0, 10, 1]#gc or jit
assert t.markers[1] == [11, 19, 0]#interp
assert t.markers[2] == [20, 30, 2]#gc or jit
assert t.markers[3] == [31, 39, 0]#interp

def test_walk_pypylog_marker(): # create_pypylog_marker is no longer in use
pypylog_path = os.path.join(os.path.dirname(__file__), "profiles/pystone.pypylog")
converter = Converter()
initial_pypylog = parse_pypylog(pypylog_path)
rescaled_pypylog = rescale_pypylog(initial_pypylog[:100], 1000000)
converter.walk_pypylog(rescaled_pypylog)
thread = converter.threads[7]

gc_set_nursery_size_id = thread.add_string("gc-set-nursery-size")# get string ids, since name is string not category
gc_hardware_id = thread.add_string("gc-hardware")
interpreter_id = thread.add_string("interpreter")

expected_interval = 1000000/100 * 0.001

assert thread.markers[0] == [0, expected_interval, gc_set_nursery_size_id] # first log
assert thread.markers[1] == [expected_interval + 1,2 * expected_interval -1, interpreter_id]# interpreter
assert thread.markers[2] == [2 * expected_interval, 3 * expected_interval, gc_hardware_id]# second log
assert thread.markers[3] == [3 * expected_interval + 1,4 * expected_interval -1, interpreter_id]# interpreter
assert thread.markers[4] == [4 * expected_interval, 5 * expected_interval, gc_hardware_id]# third log

def test_get_unused_tid():
c = Converter()
c.threads[2] = Thread()
Expand Down Expand Up @@ -479,10 +499,99 @@ def test_walk_pypylog():
[17, "gc_example_action_a", False, 0]
]
c.walk_pypylog(test_pypylog)
t = c.threads[7]
stringarray = t.stringarray
assert stringarray[0] == "interpreter"
assert stringarray[1] == "gc_example_action_a"
thread = c.threads[7]
stringarray = thread.stringarray
assert stringarray[0] == "gc_example_action_a"


def test_walk_pypylog_interpreter_samples():
c = Converter()
test_pypylog = [# this just represents the order in time time. Order in samples list differs: gc gc interp gc gc interp
[0, "gc_example_action_a", True, 0],
[5, "gc_example_action_a", False, 0],
# interpreter sample here
[10, "gc_example_action_a", True, 0],
[15, "gc_example_action_a", False, 0],
# no interpreter sample here ### maybe we dont need this case
[16, "gc_example_action_a", True, 0],
[17, "gc_example_action_a", False, 0],
# interpreter sample here
[25, "gc_example_action_a", True, 0],
[30, "gc_example_action_a", False, 0]
]
c.walk_pypylog(test_pypylog)
thread = c.threads[7]

samples = thread.samples
stacktable = thread.stacktable

sample_0 = samples[0]# should be gc_example_action_a
sample_2 = samples[2]# should be gc_example_action_a
sample_4 = samples[4]# should be interpreter
sample_6 = samples[6]# should be gc_example_action_a
sample_8 = samples[8]# should be gc_example_action_a
sample_10 = samples[10]# should be interpreter

stack_0 = stacktable[sample_0[0]]
stack_2 = stacktable[sample_2[0]]
stack_4 = stacktable[sample_4[0]]
stack_6 = stacktable[sample_6[0]]
stack_8 = stacktable[sample_8[0]]
stack_10 = stacktable[sample_10[0]]

assert stack_0[2] == CATEGORY_GC
assert stack_2[2] == CATEGORY_INTERPRETER
assert stack_4[2] == CATEGORY_GC
assert stack_6[2] == CATEGORY_GC
assert stack_8[2] == CATEGORY_INTERPRETER
assert stack_10[2] == CATEGORY_GC
assert len(samples) == 12

def test_walk_full_pypylog():
c = Converter()
test_pypylog = [
[0, "gc_example_action_a", True, 0],
[1, "gc_example_action_b", True, 1],
[2, "gc_example_action_c", True, 2],
[3, "gc_example_action_c", False, 2], # top => two samples created
[4, "gc_example_action_b", False, 1], # not top
[5, "gc_example_action_a", False, 0], # not top
[6, "gc_example_action_a", True, 0],
[7, "gc_example_action_a", False, 0] # top => two samples created
]
c.walk_pypylog(test_pypylog)
thread = c.threads[7]
samples = thread.samples
stacktable = thread.stacktable
frametable = thread.frametable
functable = thread.functable
stringarray = thread.stringarray

sample_0 = samples[0]
sample_2 = samples[2]
sample_4 = samples[4]

stack_0 = stacktable[sample_0[0]]
stack_2 = stacktable[sample_2[0]]
stack_4 = stacktable[sample_4[0]]

frame_0 = frametable[stack_0[0]]
frame_2 = frametable[stack_2[0]]
frame_4 = frametable[stack_4[0]]

func_0 = functable[frame_0[0]]
func_2 = functable[frame_2[0]]
func_4 = functable[frame_4[0]]

funcname_0 = stringarray[func_0[0]]# gc_example_action_c
funcname_2 = stringarray[func_2[0]]# interpreter
funcname_4 = stringarray[func_4[0]]# gc_example_action_a

assert funcname_0 == "gc_example_action_c" # action c is a top level action
assert funcname_2 == "interp" # interpreter (why did i call this 'interp'?)
assert funcname_4 == "gc_example_action_a" # action a is a top level action, but c should be at index 0

assert len(samples) == 6# there should be only four samples 2x action c, 2x action a and now 2x interpreter

def test_dumps_vmprof_without_pypylog():
vmprof_path = os.path.join(os.path.dirname(__file__), "profiles/vmprof_cpuburn.prof")
Expand All @@ -495,7 +604,7 @@ def test_dumps_vmprof_without_pypylog():
assert len(samples["stack"]) == 5551
assert markers["data"] == []

def test_dumps_vmprof_with_pypylog():
def test_dumps_vmprof_with_pypylog(): ### TODO: re-enable assert as soon as interpreter frames can be created again
vmprof_path = os.path.join(os.path.dirname(__file__), "profiles/vmprof_cpuburn.prof")
pypylog_path = os.path.join(os.path.dirname(__file__), "profiles/pystone.pypylog")
times = (0, 42.368387)
Expand All @@ -504,8 +613,8 @@ def test_dumps_vmprof_with_pypylog():
samples = profile["threads"][0]["samples"]
stringarray = profile["threads"][1]["stringArray"]
assert len(samples["stack"]) == 5551
assert stringarray[0] == "interpreter"
assert stringarray[1] == "gc-set-nursery-size"
#assert stringarray[0] == "interpreter"
assert stringarray[0] == "gc-set-nursery-size"

def test_write_file_dict():
file_dict = {
Expand Down

0 comments on commit 09809de

Please sign in to comment.