From 6413cfba10e07bfec4d893eecbf486859279ec0e Mon Sep 17 00:00:00 2001 From: Elias Werner Date: Fri, 23 Aug 2024 14:14:38 +0200 Subject: [PATCH 01/13] small fixes (doe history, visualization) --- src/jumper/kernel.py | 13 +++++-- src/jumper/perfdatahandler.py | 5 +-- src/jumper/visualization.py | 70 ++++++++++++++++++----------------- 3 files changed, 46 insertions(+), 42 deletions(-) diff --git a/src/jumper/kernel.py b/src/jumper/kernel.py index 27243fe..81f3eb9 100644 --- a/src/jumper/kernel.py +++ b/src/jumper/kernel.py @@ -626,7 +626,8 @@ async def scorep_execute( # Transmit user persistence and updated sys.path from Jupyter # notebook to subprocess After running the code, transmit subprocess # persistence back to Jupyter notebook - with os.fdopen(os.open(scorep_script_name, os.O_WRONLY | os.O_CREAT), 'w') as file: + with os.fdopen(os.open(scorep_script_name, os.O_WRONLY | os.O_CREAT), + 'w') as file: file.write(self.pershelper.subprocess_wrapper(code)) # For disk mode use implicit synchronization between kernel and @@ -720,7 +721,7 @@ async def scorep_execute( self.cell_output(line) performance_data_nodes, duration = ( - self.perfdata_handler.end_perfmonitor(code) + self.perfdata_handler.end_perfmonitor() ) # In disk mode, subprocess already terminated @@ -810,6 +811,7 @@ async def scorep_execute( self.pershelper.postprocess() if performance_data_nodes: self.report_perfdata(performance_data_nodes, duration) + self.perfdata_handler.append_code(datetime.datetime.now(), code) return self.standard_reply() async def do_execute( @@ -921,7 +923,8 @@ async def do_execute( pd.DataFrame( self.perfdata_handler.get_code_history(), columns=["timestamp", "code"], - ).reset_index() + ).reset_index(), layout={"topStart": "search", "topEnd": None}, + columnDefs=[{"className": 'dt-left', "targets": 2}], ) return self.standard_reply() elif code.startswith("%%perfdata_to_variable"): @@ -1060,10 +1063,12 @@ async def do_execute( cell_id=cell_id, ) performance_data_nodes, duration = ( - self.perfdata_handler.end_perfmonitor(code) + self.perfdata_handler.end_perfmonitor() ) if performance_data_nodes: self.report_perfdata(performance_data_nodes, duration) + self.perfdata_handler.append_code(datetime.datetime.now(), + code) return parent_ret elif self.mode == KernelMode.MULTICELL: return self.append_multicellmode(magics_cleanup(code)) diff --git a/src/jumper/perfdatahandler.py b/src/jumper/perfdatahandler.py index 3e8079c..da43da8 100644 --- a/src/jumper/perfdatahandler.py +++ b/src/jumper/perfdatahandler.py @@ -3,7 +3,6 @@ import pickle import codecs import time -from datetime import datetime import os import subprocess import sys @@ -326,7 +325,7 @@ def start_perfmonitor(self, pid): self.starttime = time.perf_counter() - def end_perfmonitor(self, code): + def end_perfmonitor(self): duration = time.perf_counter() - self.starttime if self.monitor_module: @@ -352,6 +351,4 @@ def end_perfmonitor(self, code): performance_data_nodes = self.parse_perfdata_from_stdout( stdout_data_node ) - if performance_data_nodes: - self.append_code(datetime.now(), code) return performance_data_nodes, duration diff --git a/src/jumper/visualization.py b/src/jumper/visualization.py index 7071621..efe4669 100644 --- a/src/jumper/visualization.py +++ b/src/jumper/visualization.py @@ -32,7 +32,7 @@ def plot_graph(ax, metric, perfdata, time_indices=None, color=None): int(os.environ.get("JUMPER_REPORT_FREQUENCY", 2)), ) ] - print(time_indices) + if metric == perfmetrics["cpu_agg"]: ax.plot( x_scale, perfdata[0][0][-3], label="Mean", color=(0.20, 0.47, 1.00) @@ -148,50 +148,52 @@ def plot_graph(ax, metric, perfdata, time_indices=None, color=None): ax.legend() ax.grid(True) - # in multi node case, we have to iterate over the indices (time_indices) - # and not only 0 here - current_index = 0 - target_index = -1 - transition_offset = (x_scale[1] - x_scale[0]) / 2 - start_offset = 0 - last_idx = time_indices[0][-1][0] - - for cell_idx, n_ms in time_indices[0]: + # colorization of the plot in case of multiple cells + if time_indices: + # in multi node case, we have to iterate over the indices (time_indices) + # and not only 0 here + current_index = 0 + target_index = -1 + transition_offset = (x_scale[1] - x_scale[0]) / 2 + start_offset = 0 + last_idx = time_indices[0][-1][0] - target_index = target_index + n_ms - # don't use offset for last cell - if cell_idx == last_idx: - transition_offset = 0 - ax.axvspan(x_scale[current_index] + start_offset, - x_scale[target_index] + - transition_offset, - facecolor=color[cell_idx], alpha=0.3) + for cell_idx, n_ms in time_indices[0]: - text_x_pos = x_scale[current_index] + start_offset + ( - (x_scale[target_index] + transition_offset - - x_scale[current_index] + start_offset) / 2) - text_y_pos = ax.get_ylim()[0] + (ax.get_ylim()[1]*0.05) + target_index = target_index + n_ms + # don't use offset for last cell + if cell_idx == last_idx: + transition_offset = 0 + ax.axvspan(x_scale[current_index] + start_offset, + x_scale[target_index] + + transition_offset, + facecolor=color[cell_idx], alpha=0.3) - # add cell index to plot - ax.text(text_x_pos, text_y_pos, "#" + str(cell_idx), style='italic', - bbox={ - 'facecolor': 'lightgrey', 'alpha': 0.5, 'pad': 2} - ) - - current_index = target_index - start_offset = transition_offset + text_x_pos = x_scale[current_index] + start_offset + ( + (x_scale[target_index] + transition_offset - + x_scale[current_index] + start_offset) / 2) + text_y_pos = ax.get_ylim()[0] + (ax.get_ylim()[1] * 0.05) + # add cell index to plot + ax.text(text_x_pos, text_y_pos, "#" + str(cell_idx), style='italic', + bbox={ + 'facecolor': 'lightgrey', 'alpha': 0.5, 'pad': 2} + ) + current_index = target_index + start_offset = transition_offset def plot_with_dropdowns(metrics, perfdata, metric_start, time_indices=None): # Create subplots in a 1x2 grid fig, axes = plt.subplots(1, 2, figsize=(10, 3)) dropdowns = [] - - color = [ - "#" + ''.join([random.choice('0123456789ABCDEF') for j in range(6)]) - for i in range(len(time_indices[0]))] + color=None + if time_indices: + color = [ + "#" + ''.join( + [random.choice('0123456789ABCDEF') for j in range(6)]) + for i in range(len(time_indices[0]))] # Plot data and create dropdowns for each subplot for i, ax in enumerate(axes): From 119756ac5215019ffbdaf7e36a4417b7f5bd53b7 Mon Sep 17 00:00:00 2001 From: Elias Werner Date: Fri, 23 Aug 2024 19:01:31 +0200 Subject: [PATCH 02/13] add support for multi cell mode colorization in plot --- src/jumper/kernel.py | 85 ++++++++++++++++++++++++++++++----- src/jumper/perfdatahandler.py | 18 ++++++-- src/jumper/visualization.py | 9 ++-- 3 files changed, 94 insertions(+), 18 deletions(-) diff --git a/src/jumper/kernel.py b/src/jumper/kernel.py index 81f3eb9..e2a675d 100644 --- a/src/jumper/kernel.py +++ b/src/jumper/kernel.py @@ -80,8 +80,8 @@ def __init__(self, **kwargs): self.mode = KernelMode.DEFAULT - self.multicell_cellcount = 0 - self.multicell_code = "" + self.multicell_cellcount = -1 + self.multicell_code = "import time\n" self.writefile_base_name = "jupyter_to_script" self.writefile_bash_name = "" @@ -198,8 +198,9 @@ def set_scorep_env(self, code): """ if self.mode == KernelMode.DEFAULT: for scorep_param in code.split("\n")[1:]: - key, val = scorep_param.split("=") - self.scorep_env[key] = val + if not scorep_param == "": + key, val = scorep_param.split("=") + self.scorep_env[key] = val self.cell_output( "Score-P environment set successfully: " + str(self.scorep_env) ) @@ -264,6 +265,7 @@ def append_multicellmode(self, code): f"print('Executing cell {self.multicell_cellcount}')\n" + f"print('''{code}''')\n" + f"print('-' * {max_line_len})\n" + + f"print('MCM_TS'+str(time.time()))\n" + f"{code}\n" + "print('''\n''')\n" ) @@ -279,8 +281,8 @@ def abort_multicellmode(self): """ if self.mode == KernelMode.MULTICELL: self.mode = KernelMode.DEFAULT - self.multicell_code = "" - self.multicell_cellcount = 0 + self.multicell_code = "import time\n" + self.multicell_cellcount = -1 self.cell_output("Multicell mode aborted.") else: self.cell_output( @@ -705,6 +707,8 @@ async def scorep_execute( # Empty cell output, required for interactive output # e.g. tqdm for-loop progress bar self.cell_output("\0") + + multicellmode_timestamps = [] while True: chunk = b"" + proc.stdout.read(READ_CHUNK_SIZE) if chunk == b"": @@ -718,8 +722,49 @@ async def scorep_execute( else: incomplete_line = "" for line in lines: + if "MCM_TS" in line: + multicellmode_timestamps.append(line) + continue self.cell_output(line) + # for multiple nodes, we have to add more lists here, one list per node + # this is required to be in line with the performance data aggregation + # for the %%display_graph_for_all magic command, which does not have + # explicit timestamps, but aligns the colorization of the plot based + # on the number of perf measurements we have, which is individual per + # node + time_indices = None + if len(multicellmode_timestamps): + # retrieve the index this cell will have in the global history + sub_idx = len(self.perfdata_handler.get_code_history()) + # append to have end of last code fragment + multicellmode_timestamps.append("MCM_TS"+str(time.time())) + time_indices = [[]] + nb_ms = 0.0 + for idx, ts_string in enumerate(multicellmode_timestamps[:-1]): + secs = (float(multicellmode_timestamps[idx+1][6:]) - + float(ts_string[6:])) + nb_ms += (secs / + int(os.environ.get("JUMPER_REPORT_FREQUENCY", 2))) + if nb_ms >= 1.0: + # only consider if we have measurements + time_indices[0].append((str(sub_idx)+"_"+str(idx), nb_ms)) + nb_ms %= 1.0 + # add time for last to last measurement + if nb_ms >= 0.0: + sub_idx, val = time_indices[0][-1] + time_indices[0][-1] = (sub_idx, val + nb_ms) + + nb_ms = 0.0 + for idx, val in enumerate(time_indices[0]): + sub_idx, ms = val + nb_ms += ms % 1.0 + ms = int(ms) + if nb_ms >= 1.0: + ms += 1 + nb_ms %= 1.0 + time_indices[0][idx] = (sub_idx, ms) + performance_data_nodes, duration = ( self.perfdata_handler.end_perfmonitor() ) @@ -811,7 +856,8 @@ async def scorep_execute( self.pershelper.postprocess() if performance_data_nodes: self.report_perfdata(performance_data_nodes, duration) - self.perfdata_handler.append_code(datetime.datetime.now(), code) + self.perfdata_handler.append_code(datetime.datetime.now(), code, + time_indices) return self.standard_reply() async def do_execute( @@ -856,10 +902,21 @@ async def do_execute( return self.standard_reply() """ if code.startswith("%%display_graph_for_last"): + if not len(self.perfdata_handler.get_perfdata_history()): + self.cell_output( + "No performance data available." + ) + time_indices = self.perfdata_handler.get_time_indices()[-1] + if time_indices: + sub_idxs = [x[0] for x in time_indices[0]] + self.cell_output(f"Cell seemed to be tracked in multi cell" + " mode. Got performance data for the" + f"following sub cells: {sub_idxs}") perfvis.draw_performance_graph( self.nodelist, self.perfdata_handler.get_perfdata_history()[-1], self.gpu_avail, + time_indices ) return self.standard_reply() elif code.startswith("%%display_graph_for_index"): @@ -876,10 +933,17 @@ async def do_execute( + " cells. This index is not available." ) else: + time_indices = self.perfdata_handler.get_time_indices()[index] + if time_indices: + sub_idxs = [x[0] for x in time_indices[0]] + self.cell_output(f"Cell seemed to be tracked in multi cell" + " mode. Got performance data for the" + f"following sub cells: {sub_idxs}") perfvis.draw_performance_graph( self.nodelist, self.perfdata_handler.get_perfdata_history()[index], self.gpu_avail, + time_indices ) return self.standard_reply() elif code.startswith("%%display_graph_for_all"): @@ -986,7 +1050,8 @@ async def do_execute( return self.set_scorep_pythonargs(code) elif code.startswith("%%serializer_settings"): self.cell_output( - "Deprecated. Use: %%marshalling_settings\n[MARSHALLER=]\n[MODE=]", + "Deprecated. Use: %%marshalling_settings\n[MARSHALLER=]\n[" + "MODE=]", "stdout", ) return self.standard_reply() @@ -1015,8 +1080,8 @@ async def do_execute( "KernelError: Multicell execution failed.", "stderr" ) return self.standard_reply() - self.multicell_code = "" - self.multicell_cellcount = 0 + self.multicell_code = "import time\n" + self.multicell_cellcount = -1 return reply_status elif self.mode == KernelMode.WRITEFILE: self.writefile_multicell = False diff --git a/src/jumper/perfdatahandler.py b/src/jumper/perfdatahandler.py index da43da8..8f38af9 100644 --- a/src/jumper/perfdatahandler.py +++ b/src/jumper/perfdatahandler.py @@ -27,8 +27,9 @@ class PerformanceDataHandler: def __init__(self): self.code_history = [] self.performance_data_history = [] + self.time_indices = [] self.nodelist = None - # for local it's none, otherwise points to slurm/ssh/... monitor + # for local, it's none, otherwise points to slurm/ssh/... monitor self.monitor_module = None # the object from the monitor module self.monitor = None @@ -77,8 +78,12 @@ def get_perfdata_history(self): def get_code_history(self): return self.code_history - def append_code(self, time, code): - self.code_history.append([time, code]) + def get_time_indices(self): + return self.time_indices + + def append_code(self, time_, code, time_indices=None): + self.code_history.append([time_, code]) + self.time_indices.append(time_indices) def get_perfdata_aggregated(self): perfdata_aggregated = [] @@ -156,7 +161,12 @@ def get_perfdata_aggregated(self): # add cell index and the number of measurements # we will use that in the visualization to generate # a color transition in the graphs and add the cell index - time_indices[node].append((idx, len(perfdata[node][2]))) + if self.time_indices[idx]: + # for cells tracked in multi cell mode, we can use the sub + # indices created + time_indices[node].extend(self.time_indices[idx][node]) + else: + time_indices[node].append((idx, len(perfdata[node][2]))) return perfdata_aggregated, time_indices diff --git a/src/jumper/visualization.py b/src/jumper/visualization.py index efe4669..d3d2806 100644 --- a/src/jumper/visualization.py +++ b/src/jumper/visualization.py @@ -158,11 +158,12 @@ def plot_graph(ax, metric, perfdata, time_indices=None, color=None): start_offset = 0 last_idx = time_indices[0][-1][0] - for cell_idx, n_ms in time_indices[0]: - + # for multi cell mode, we might have sub indices + for cell_idx, values in enumerate(time_indices[0]): + sub_idx, n_ms = values target_index = target_index + n_ms # don't use offset for last cell - if cell_idx == last_idx: + if sub_idx == last_idx: transition_offset = 0 ax.axvspan(x_scale[current_index] + start_offset, x_scale[target_index] + @@ -175,7 +176,7 @@ def plot_graph(ax, metric, perfdata, time_indices=None, color=None): text_y_pos = ax.get_ylim()[0] + (ax.get_ylim()[1] * 0.05) # add cell index to plot - ax.text(text_x_pos, text_y_pos, "#" + str(cell_idx), style='italic', + ax.text(text_x_pos, text_y_pos, "#" + str(sub_idx), style='italic', bbox={ 'facecolor': 'lightgrey', 'alpha': 0.5, 'pad': 2} ) From ad9aaef8add24960688b563794192c7dfaa25894 Mon Sep 17 00:00:00 2001 From: Elias Werner Date: Fri, 23 Aug 2024 19:13:18 +0200 Subject: [PATCH 03/13] add code4history as a basis for multicellmode code history add execute with scorep to code history again fix spelling --- src/jumper/kernel.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/src/jumper/kernel.py b/src/jumper/kernel.py index e2a675d..ac68252 100644 --- a/src/jumper/kernel.py +++ b/src/jumper/kernel.py @@ -604,6 +604,7 @@ def report_perfdata(self, performance_data_nodes, duration): async def scorep_execute( self, code, + code4history, silent, store_history=True, user_expressions=None, @@ -856,8 +857,8 @@ async def scorep_execute( self.pershelper.postprocess() if performance_data_nodes: self.report_perfdata(performance_data_nodes, duration) - self.perfdata_handler.append_code(datetime.datetime.now(), code, - time_indices) + self.perfdata_handler.append_code(datetime.datetime.now(), + code4history, time_indices) return self.standard_reply() async def do_execute( @@ -911,7 +912,7 @@ async def do_execute( sub_idxs = [x[0] for x in time_indices[0]] self.cell_output(f"Cell seemed to be tracked in multi cell" " mode. Got performance data for the" - f"following sub cells: {sub_idxs}") + f" following sub cells: {sub_idxs}") perfvis.draw_performance_graph( self.nodelist, self.perfdata_handler.get_perfdata_history()[-1], @@ -938,7 +939,7 @@ async def do_execute( sub_idxs = [x[0] for x in time_indices[0]] self.cell_output(f"Cell seemed to be tracked in multi cell" " mode. Got performance data for the" - f"following sub cells: {sub_idxs}") + f" following sub cells: {sub_idxs}") perfvis.draw_performance_graph( self.nodelist, self.perfdata_handler.get_perfdata_history()[index], @@ -1067,7 +1068,9 @@ async def do_execute( if self.mode == KernelMode.MULTICELL: self.mode = KernelMode.DEFAULT try: + # second multicell_code should be cleaned for code history reply_status = await self.scorep_execute( + self.multicell_code, self.multicell_code, silent, store_history, @@ -1098,8 +1101,11 @@ async def do_execute( return self.end_writefile() elif code.startswith("%%execute_with_scorep"): if self.mode == KernelMode.DEFAULT: + # second code argument is for history purposes, we want to keep + # everything return await self.scorep_execute( code.split("\n", 1)[1], + code, silent, store_history, user_expressions, From 3c666b781c866ad20e24334e290a898890abf0cc Mon Sep 17 00:00:00 2001 From: Elias Werner Date: Mon, 26 Aug 2024 10:48:05 +0200 Subject: [PATCH 04/13] add code history parameter for multi cell mode for a more tidy code history add time indices to export (for multi cell mode) --- src/jumper/kernel.py | 53 ++++++++++++++++++++++++++++++++++---------- 1 file changed, 41 insertions(+), 12 deletions(-) diff --git a/src/jumper/kernel.py b/src/jumper/kernel.py index ac68252..3f0b2bd 100644 --- a/src/jumper/kernel.py +++ b/src/jumper/kernel.py @@ -82,6 +82,7 @@ def __init__(self, **kwargs): self.multicell_cellcount = -1 self.multicell_code = "import time\n" + self.multicell_code_history = "" self.writefile_base_name = "jupyter_to_script" self.writefile_bash_name = "" @@ -269,6 +270,10 @@ def append_multicellmode(self, code): + f"{code}\n" + "print('''\n''')\n" ) + self.multicell_code_history += ( + f"###User code for sub cell {self.multicell_cellcount}\n" + + f"print('''{code}''')\n" + ) self.cell_output( f"Cell marked for multicell mode. It will be executed at " f"position {self.multicell_cellcount}" @@ -282,6 +287,7 @@ def abort_multicellmode(self): if self.mode == KernelMode.MULTICELL: self.mode = KernelMode.DEFAULT self.multicell_code = "import time\n" + self.multicell_code_history = "" self.multicell_cellcount = -1 self.cell_output("Multicell mode aborted.") else: @@ -604,7 +610,7 @@ def report_perfdata(self, performance_data_nodes, duration): async def scorep_execute( self, code, - code4history, + code_for_history, silent, store_history=True, user_expressions=None, @@ -858,7 +864,7 @@ async def scorep_execute( if performance_data_nodes: self.report_perfdata(performance_data_nodes, duration) self.perfdata_handler.append_code(datetime.datetime.now(), - code4history, time_indices) + code_for_history, time_indices) return self.standard_reply() async def do_execute( @@ -889,8 +895,8 @@ async def do_execute( %%display_graphs_for_last cpu_util, ... # displays one graph for all cell, arguments: cpu_util etc. %%display_graphs_for_all cpu_util, ... - # -> would be cool if we can hover the graph and per timepoint, - we see the index of the cell + # -> displays performance data aggregated, indicates different cells + # by color # displays graph for index cell, arguments: cpu_util etc. %%display_graphs_for_index i cpu_util, ... """ @@ -995,23 +1001,45 @@ async def do_execute( elif code.startswith("%%perfdata_to_variable"): if len(code.split(" ")) == 1: self.cell_output( - "No variable to export specified. Use: " + "No variable for export specified. Use: " "%%perfdata_to_variable myvar", "stdout", ) else: varname = code.split(" ")[1] - await super().do_execute( - f"{varname}=" - f"{self.perfdata_handler.get_perfdata_history()}", - silent=True, - ) + # for multi cell mode, we might have time indices which we want + # to communicate to the user, each time_index has the index of + # the overall mult cell and the sub index within this cell. + # In addition, it has a counter for the number of measurements + # each sub cell corresponds to in the list of performance data + # measurements, e.g. (2_0, 5), (2_1, 3), (2_2, 7) + mcm_time_indices = self.perfdata_handler.get_time_indices() + mcm_time_indices = list( + filter(lambda item: item is not None, mcm_time_indices)) + + code = (f"{varname}=" + f"{self.perfdata_handler.get_perfdata_history()}") + + if mcm_time_indices: + code += f"\n{varname}.append({mcm_time_indices})" + + await super().do_execute(code,silent=True) self.cell_output( "Exported performance data to " + str(varname) - + " variable", + + " variable. ", "stdout", ) + if mcm_time_indices: + self.cell_output( + "Detected that cells were executed in multi cell mode." + + f"Last entry in {varname} is a list that contains " + f"the sub indices per cell that were executed in " + f"in multi cell mode and a counter for the number of" + f" performance measurements within this sub cell, " + f"e.g. f{mcm_time_indices[-1]}", + "stdout", + ) return self.standard_reply() elif code.startswith("%%perfdata_to_json"): if len(code.split(" ")) == 1: @@ -1071,7 +1099,7 @@ async def do_execute( # second multicell_code should be cleaned for code history reply_status = await self.scorep_execute( self.multicell_code, - self.multicell_code, + self.multicell_code_history, silent, store_history, user_expressions, @@ -1084,6 +1112,7 @@ async def do_execute( ) return self.standard_reply() self.multicell_code = "import time\n" + self.multicell_code_history = "" self.multicell_cellcount = -1 return reply_status elif self.mode == KernelMode.WRITEFILE: From 3981edaaf71d753a0f2a0a510aaef0fda64e50b3 Mon Sep 17 00:00:00 2001 From: Elias Werner Date: Wed, 20 Nov 2024 15:34:40 +0100 Subject: [PATCH 05/13] merge master and perf monitoring --- ExampleJUmPER.ipynb | 633 ------------------------ README.md | 49 +- examples/ExampleMonitoring.ipynb | 418 ++++++++++++++++ examples/ExampleNotebook.ipynb | 506 ------------------- examples/demonstrator.ipynb | 340 ------------- examples/fairytales_demo.txt | 136 ----- examples/gpt-demo/01-GPT-Training.ipynb | 39 +- src/jumper/kernel.py | 253 +++++----- src/jumper/userpersistence.py | 51 +- tests/kernel/notebook.ipynb | 306 +++++++++--- tests/kernel/persistence.yaml | 9 +- tests/kernel/scorep_env.yaml | 16 +- tests/kernel/writemode.yaml | 29 +- tests/test_kernel.py | 11 +- 14 files changed, 928 insertions(+), 1868 deletions(-) delete mode 100644 ExampleJUmPER.ipynb create mode 100644 examples/ExampleMonitoring.ipynb delete mode 100644 examples/ExampleNotebook.ipynb delete mode 100644 examples/demonstrator.ipynb delete mode 100644 examples/fairytales_demo.txt diff --git a/ExampleJUmPER.ipynb b/ExampleJUmPER.ipynb deleted file mode 100644 index f0fcb97..0000000 --- a/ExampleJUmPER.ipynb +++ /dev/null @@ -1,633 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "34fc5489-608e-434d-93c2-cefe6b33890f", - "metadata": {}, - "source": [ - "### News about PyPerf Jupyter Kernel (Performance Monitoring and Analysis in Jupyter)\n", - "- added metrics to display in Jupyter (mean/max/min, raw values) for: CPU,GPU,Mem\n", - "- use dropdown menus to select metrics (interactively)\n", - "- use in-memory serialization instead of disk-based for Score-P execution\n", - "- modular system for backbone serializer (dill/cloudpickle)\n", - "\n", - "---" - ] - }, - { - "cell_type": "markdown", - "id": "539781f0-7e0c-4988-b5c1-8a369a6de653", - "metadata": {}, - "source": [ - "**Toy Example:** Estimating Pi by Throwing Darts" - ] - }, - { - "cell_type": "code", - "id": "a9e8242b-dd85-48dc-b67f-81b0926bf67e", - "metadata": { - "ExecuteTime": { - "end_time": "2024-07-23T11:44:11.335951Z", - "start_time": "2024-07-23T11:44:10.322227Z" - } - }, - "source": "%env JUMPER_REPORT_FREQUENCY=2", - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "env: JUMPER_REPORT_FREQUENCY=2\n" - ] - } - ], - "execution_count": 5 - }, - { - "cell_type": "code", - "id": "2a10feac-0a7b-478a-ab36-71b3a1844f92", - "metadata": { - "ExecuteTime": { - "end_time": "2024-07-23T11:44:13.374568Z", - "start_time": "2024-07-23T11:44:12.366967Z" - } - }, - "source": "%env JUMPER_REPORTS_MIN=2", - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "env: JUMPER_REPORTS_MIN=2\n" - ] - } - ], - "execution_count": 6 - }, - { - "cell_type": "code", - "id": "519f9bc9-d715-480d-ac6f-cd2771ded5f3", - "metadata": { - "ExecuteTime": { - "end_time": "2024-07-23T11:44:16.675947Z", - "start_time": "2024-07-23T11:44:15.670611Z" - } - }, - "source": [ - "from random import random\n", - "from time import time\n", - "from multiprocessing import Pool\n", - "import sys" - ], - "outputs": [], - "execution_count": 7 - }, - { - "cell_type": "code", - "id": "bbc9a5d8-bfcf-4810-b232-e49b4be5f1e6", - "metadata": { - "ExecuteTime": { - "end_time": "2024-07-23T11:44:17.688886Z", - "start_time": "2024-07-23T11:44:16.677748Z" - } - }, - "source": [ - "def throw_dart(iterations: int) -> int:\n", - " hits = 0\n", - " for i in range(iterations):\n", - " x = random()\n", - " y = random()\n", - " if (x * x) + (y * y) <= 1:\n", - " hits = hits + 1\n", - " return hits\n", - "\n", - "\n", - "def compute_pi(iterations, process_count):\n", - " pool = Pool(processes=process_count)\n", - " trials_per_process = [int(iterations / process_count)] * process_count\n", - "\n", - " start = time()\n", - "\n", - " hits = pool.map(throw_dart, trials_per_process)\n", - " pi = (sum(hits) * 4) / iterations\n", - "\n", - " end = time()\n", - "\n", - " print(pi)\n", - " print(f\"Execution time: {end - start} seconds.\")" - ], - "outputs": [], - "execution_count": 8 - }, - { - "cell_type": "code", - "id": "86b1962f-2053-4df6-b3fd-6f7e4fed48b3", - "metadata": { - "ExecuteTime": { - "end_time": "2024-07-23T11:45:27.484787Z", - "start_time": "2024-07-23T11:44:19.133262Z" - } - }, - "source": [ - "# 10⁹ iterations on 4 CPUs.on_click(partial(generate_plot, x=x, y=y))\n", - "compute_pi(10**9, 8)" - ], - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "3.141498828\n", - "Execution time: 67.2501654624939 seconds.\n", - "\n", - "----Performance Data----\n", - "Duration: 67.33\n", - "\n", - "CPU Util (Across CPUs) \tAVG: 99.80\t MIN: 76.50\t MAX: 100.00\n", - "Mem Util in GB (Across nodes)\tAVG: 11.18\t MIN: 10.85\t MAX: 11.36\n", - "IO Ops (excl.) Read \tTotal: 6269\n", - " Write \tTotal: 3565\n", - "IO Bytes (excl.) Read \tTotal: 0.16\n", - " Write \tTotal: 1.06\n" - ] - } - ], - "execution_count": 9 - }, - { - "metadata": { - "ExecuteTime": { - "end_time": "2024-07-23T11:42:09.272677Z", - "start_time": "2024-07-23T11:42:08.265658Z" - } - }, - "cell_type": "code", - "source": "%env PARALLEL_MARSHALL_DEBUG=10", - "id": "147fdd845d36eeaf", - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "env: PARALLEL_MARSHALL_DEBUG=10\n" - ] - } - ], - "execution_count": 1 - }, - { - "cell_type": "code", - "id": "5c8669db-5262-40dc-bb43-54a71f562966", - "metadata": { - "ExecuteTime": { - "end_time": "2024-07-23T12:18:44.235036Z", - "start_time": "2024-07-23T12:18:44.231070Z" - } - }, - "source": [ - "%%serializer_settings\n", - "MODE=memory\n", - "SERIALIZER=parallel_marshall" - ], - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Kernel uses 'parallel_marshall' serializer in 'memory' mode." - ] - } - ], - "execution_count": 1 - }, - { - "metadata": { - "ExecuteTime": { - "end_time": "2024-07-23T12:18:45.241643Z", - "start_time": "2024-07-23T12:18:44.236728Z" - } - }, - "cell_type": "code", - "source": "a=2", - "id": "971d897afb7d0131", - "outputs": [], - "execution_count": 2 - }, - { - "metadata": { - "ExecuteTime": { - "end_time": "2024-07-23T12:18:47.701712Z", - "start_time": "2024-07-23T12:18:45.245174Z" - } - }, - "cell_type": "code", - "source": [ - "%%execute_with_scorep\n", - "print(a)" - ], - "id": "9791749f398efefe", - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\u00002\n", - "Instrumentation results can be found in /home/eliasw/scorep_jupyter_kernel_python/scorep-20240723_1418_17044002454007\n", - "1418\n", - "\n", - "1418\n" - ] - } - ], - "execution_count": 3 - }, - { - "cell_type": "code", - "id": "b3b5516b-be9c-49e0-8394-3c9fff3943de", - "metadata": { - "ExecuteTime": { - "end_time": "2024-07-23T11:46:43.484312Z", - "start_time": "2024-07-23T11:46:42.943618Z" - } - }, - "source": [ - "%%display_graph_for_last" - ], - "outputs": [ - { - "data": { - "text/plain": [ - "Button(description='Add Display', style=ButtonStyle())" - ], - "application/vnd.jupyter.widget-view+json": { - "version_major": 2, - "version_minor": 0, - "model_id": "0f5ec8888ff44d06a10ef232a2a7982a" - } - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": [ - "Output()" - ], - "application/vnd.jupyter.widget-view+json": { - "version_major": 2, - "version_minor": 0, - "model_id": "8098eb63b6524a26931fa7a21a29acf8" - } - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": [ - "HBox(children=(Dropdown(description='Metric:', options=('CPU Usage (Min/Max/Mean)', 'CPU Usage (Raw)', 'Mem in…" - ], - "application/vnd.jupyter.widget-view+json": { - "version_major": 2, - "version_minor": 0, - "model_id": "5a544414f17c4bfbb5994e4b11816d4c" - } - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": [ - "
" - ], - "image/png": "" - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "execution_count": 10 - }, - { - "cell_type": "code", - "execution_count": 7, - "id": "1f808f90-0e37-469e-b568-178d84e3e504", - "metadata": {}, - "outputs": [ - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "82e625e63aa14c4ea99c6a881e166c8f", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "Button(description='Add Display', style=ButtonStyle())" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "e612fe6d0fc64f82b8e2b9d6b5f006f0", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "Output()" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "33e817abcb4e42949e7521ee326298af", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "HBox(children=(Dropdown(description='Metric:', options=('CPU Util (Min/Max/Mean)', 'CPU Cores (Raw)', 'Mem', '…" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "ef836a276959448aa6a29c3a116ca8dd", - "version_major": 2, - "version_minor": 0 - }, - "image/png": "", - "text/html": [ - "\n", - "
\n", - "
\n", - " Figure\n", - "
\n", - " \n", - "
\n", - " " - ], - "text/plain": [ - "Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "%%display_graph_for_index 0" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "id": "f5368a55-4cd0-437c-a38b-1557a5dbdd88", - "metadata": {}, - "outputs": [ - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "3d6ffd83e6624207aeff591b601c65ca", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "Button(description='Add Display', style=ButtonStyle())" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "f957405cd8d242d285264336f1fe5498", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "Output()" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "a8c296952d4d4d879add40a58c229645", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "HBox(children=(Dropdown(description='Metric:', options=('CPU Util (Min/Max/Mean)', 'CPU Cores (Raw)', 'Mem', '…" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "3e1f389785e84f9f92edce112f0b6020", - "version_major": 2, - "version_minor": 0 - }, - "image/png": "", - "text/html": [ - "\n", - "
\n", - "
\n", - " Figure\n", - "
\n", - " \n", - "
\n", - " " - ], - "text/plain": [ - "Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "%%display_graph_for_all" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "id": "4336cf63-51b4-479b-9b5a-e5527788ffde", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "\n", - "
\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
indextimestampcode
Loading... (need help?)
\n", - "\n", - "\n", - "
\n" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "%%display_code_history" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "id": "63be1a14-4659-47d7-9358-c2d435d46bee", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Cell timestamp: 2024-05-11 00:31:34.607913\n", - "--\n", - "# 10⁹ iterations on 4 CPUs.on_click(partial(generate_plot, x=x, y=y))\n", - "compute_pi(10**9, 8)" - ] - } - ], - "source": [ - "%%display_code_for_index 0" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "id": "3ddda407-4989-493d-90ba-dbb89d0841f8", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Exported performance data to myvar variable" - ] - } - ], - "source": [ - "%%perfdata_to_variable myvar" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "id": "d1c55854-ce4c-4f5d-869a-9f920be6645b", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Exported performance data to myfile_perfdata.json and myfile_code.json" - ] - } - ], - "source": [ - "%%perfdata_to_json myfile" - ] - }, - { - "cell_type": "markdown", - "id": "ed4972cf-90c8-4ad8-81d8-b79dccad600b", - "metadata": {}, - "source": [ - "---\n", - "**Plans:**\n", - "- retrieve metrics on multiple nodes (add network, psutil delivers that)\n", - "- parallel serialization (for scorep)\n", - "- show index as thumbnail or so when hovering the graphs" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "29194d3f-ff1f-48a2-9a78-af8192b9dae8", - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "JUmPER", - "language": "python", - "name": "jumper" - }, - "language_info": { - "file_extension": ".py", - "mimetype": "text/plain", - "name": "python" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/README.md b/README.md index b6a6859..6c7c05f 100644 --- a/README.md +++ b/README.md @@ -20,18 +20,23 @@ For binding to Score-P, the kernel uses the [Score-P Python bindings](https://gi # Table of Content -- [Installation](#Installation) -- [Usage](#Usage) - * [Monitoring](#Monitoring) - * [Score-P Instrumentation](#Score-P-Instrumentation) - * [Multi-Cell Mode](#Multi-Cell-Mode) - * [Write Mode](#Write-Mode) -- [Presentation of Performance Data](#Presentation-of-Performance-Data) -- [Limitations](#Limitations) -- [Future Work](#Future-Work) -- [Citing](#Citing) -- [Contact](#Contact) -- [Acknowledgments](#Acknowledgments) +- [A Jupyter Kernel for Performance Engineering](#a-jupyter-kernel-for-performance-engineering) +- [Table of Content](#table-of-content) +- [Installation](#installation) +- [Usage](#usage) + - [Monitoring](#monitoring) + - [Score-P Instrumentation](#score-p-instrumentation) + - [Configuring Score-P in Jupyter](#configuring-score-p-in-jupyter) + - [Multi-Cell Mode](#multi-cell-mode) + - [Write Mode](#write-mode) +- [Presentation of Performance Data](#presentation-of-performance-data) +- [Limitations](#limitations) + - [Serialization Type Support](#serialization-type-support) + - [Overhead](#overhead) +- [Future Work](#future-work) +- [Citing](#citing) +- [Contact](#contact) +- [Acknowledgments](#acknowledgments) # Installation @@ -123,11 +128,12 @@ Exports the performance data and the code to json files. ### Configuring Score-P in Jupyter -`%%scorep_env` - -Set up your Score-P environment. For a documentation of Score-P environment variables, see: [Score-P Measurement Configuration](https://perftools.pages.jsc.fz-juelich.de/cicd/scorep/tags/latest/html/scorepmeasurementconfig.html). - -![](doc/scorep_setup.png) +Set up your Score-P environment with `%env` line magic, e.g.: +``` +%env SCOREP_ENABLE_TRACING=1 +%env SCOREP_TOTAL_MEMORY=3g +``` +For a documentation of Score-P environment variables, see: [Score-P Measurement Configuration](https://perftools.pages.jsc.fz-juelich.de/cicd/scorep/tags/latest/html/scorepmeasurementconfig.html). `%%scorep_python_binding_arguments` @@ -198,7 +204,7 @@ Enables the write mode and starts the marking process. Subsequently, "running" c Stops the marking process and writes the marked cells in a Python script. Additionally, a bash script will be created for setting the Score-P environment variables, Pyhton bindings arguments and executing the Python script. **Hints**: -- Recording a cell containing `%%scorep_env` or `%%scorep_python_binding_arguments` will add the environment variables/Score-P Python bindings to the bash script. +- Recording a cell containing `%%scorep_python_binding_arguments` will add the Score-P Python bindings to the bash script. - Code of a cell which is not to be executed with Score-P (not inside the multicell mode and without `%%execute_with_scorep`) will be framed with `with scorep.instrumenter.disable()` in the Python script to prevent instrumentation. @@ -223,7 +229,7 @@ For the execution of a cell, the kernel uses the default IPython kernel. For a c > `dill` cannot yet pickle these standard types: > frame, generator, traceback -Similar yields for cloudpickle. Use the `%%serializer_settings` magic command to switch between both depending on your needs. +Similar yields for cloudpickle. Use the `%%marshalling_settings` magic command to switch between both depending on your needs. ## Overhead @@ -233,9 +239,8 @@ When dealing with big data structures, there might be a big runtime overhead at The kernel is still under development. The following is on the agenda: - - Check alternative Python implementations (Stackless/PyPy) for better serialization support - - Performance data visualizations - - Overhead reduction (we already implemented in-memory communication for persistence handling and plan to have a parallel serializer) + - Provide perfmonitors for multi node setups + - Config for default perfmonitor to define collected metrics PRs are welcome. diff --git a/examples/ExampleMonitoring.ipynb b/examples/ExampleMonitoring.ipynb new file mode 100644 index 0000000..26de264 --- /dev/null +++ b/examples/ExampleMonitoring.ipynb @@ -0,0 +1,418 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "34fc5489-608e-434d-93c2-cefe6b33890f", + "metadata": {}, + "source": [ + "### News about PyPerf Jupyter Kernel (Performance Monitoring and Analysis in Jupyter)\n", + "- added metrics to display in Jupyter (mean/max/min, raw values) for: CPU,GPU,Mem\n", + "- use dropdown menus to select metrics (interactively)\n", + "- use in-memory serialization instead of disk-based for Score-P execution\n", + "- modular system for backbone serializer (dill/cloudpickle)\n", + "\n", + "---" + ] + }, + { + "cell_type": "markdown", + "id": "539781f0-7e0c-4988-b5c1-8a369a6de653", + "metadata": {}, + "source": [ + "**Toy Example:** Estimating Pi by Throwing Darts" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "a9e8242b-dd85-48dc-b67f-81b0926bf67e", + "metadata": { + "ExecuteTime": { + "end_time": "2024-08-14T16:17:42.040767Z", + "start_time": "2024-08-14T16:17:41.030411Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "env: JUMPER_REPORT_FREQUENCY=2\n" + ] + } + ], + "source": [ + "%env JUMPER_REPORT_FREQUENCY=2" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "2a10feac-0a7b-478a-ab36-71b3a1844f92", + "metadata": { + "ExecuteTime": { + "end_time": "2024-08-14T08:42:57.570647Z", + "start_time": "2024-08-14T08:42:56.565047Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "env: JUMPER_REPORTS_MIN=2\n" + ] + } + ], + "source": [ + "%env JUMPER_REPORTS_MIN=2" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "3bc9bbd5-4220-4361-afa0-e01bb5031996", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "env: SCOREP_ENABLE_TRACING=1\n", + "env: SCOREP_ENABLE_PROFILING=0\n", + "env: SCOREP_TOTAL_MEMORY=3g\n" + ] + } + ], + "source": [ + "%env SCOREP_ENABLE_TRACING=1\n", + "%env SCOREP_ENABLE_PROFILING=0\n", + "%env SCOREP_TOTAL_MEMORY=3g" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "519f9bc9-d715-480d-ac6f-cd2771ded5f3", + "metadata": { + "ExecuteTime": { + "end_time": "2024-08-14T08:42:58.580020Z", + "start_time": "2024-08-14T08:42:57.573894Z" + } + }, + "outputs": [], + "source": [ + "from random import random\n", + "import time\n", + "from multiprocessing import Pool\n", + "import sys" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "bbc9a5d8-bfcf-4810-b232-e49b4be5f1e6", + "metadata": { + "ExecuteTime": { + "end_time": "2024-08-14T08:42:59.590691Z", + "start_time": "2024-08-14T08:42:58.581417Z" + } + }, + "outputs": [], + "source": [ + "def throw_dart(iterations: int) -> int:\n", + " hits = 0\n", + " for i in range(iterations):\n", + " x = random()\n", + " y = random()\n", + " if (x * x) + (y * y) <= 1:\n", + " hits = hits + 1\n", + " return hits\n", + "\n", + "\n", + "def compute_pi(iterations, process_count):\n", + " pool = Pool(processes=process_count)\n", + " trials_per_process = [int(iterations / process_count)] * process_count\n", + "\n", + " start = time.time()\n", + "\n", + " hits = pool.map(throw_dart, trials_per_process)\n", + " pi = (sum(hits) * 4) / iterations\n", + "\n", + " end = time.time()\n", + "\n", + " print(pi)" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "86b1962f-2053-4df6-b3fd-6f7e4fed48b3", + "metadata": { + "ExecuteTime": { + "end_time": "2024-08-14T08:46:45.581231Z", + "start_time": "2024-08-14T08:45:44.660151Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "3.14178844\n", + "\n", + "----Performance Data----\n", + "Duration: 15.69\n", + "\n", + "CPU Util (Across CPUs) \tAVG: 53.33\t MIN: 0.50\t MAX: 100.00\n", + "Mem Util in GB (Across nodes)\tAVG: 13.49\t MIN: 13.49\t MAX: 13.50\n", + "IO Ops (excl.) Read \tTotal: 3784\n", + " Write \tTotal: 408\n", + "IO Bytes (excl.) Read \tTotal: 0.19\n", + " Write \tTotal: 0.29\n" + ] + } + ], + "source": [ + "# 10⁹ iterations on 2 CPUs\n", + "compute_pi(10**8, 2)" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "47752868-6f55-4e03-be46-e14e4f83e11a", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "3.14178776\n", + "\n", + "----Performance Data----\n", + "Duration: 7.59\n", + "\n", + "CPU Util (Across CPUs) \tAVG: 99.55\t MIN: 85.70\t MAX: 100.00\n", + "Mem Util in GB (Across nodes)\tAVG: 13.50\t MIN: 13.49\t MAX: 13.51\n", + "IO Ops (excl.) Read \tTotal: 4123\n", + " Write \tTotal: 494\n", + "IO Bytes (excl.) Read \tTotal: 0.19\n", + " Write \tTotal: 0.32\n" + ] + } + ], + "source": [ + "# 10⁹ iterations on 8 CPUs\n", + "compute_pi(10**8, 8)" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "461b590d-b319-4172-b95d-91c7989a8a5e", + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "ce5147eac02741ffbc85924f983349d2", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Button(description='Add Display', style=ButtonStyle())" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "b06600309c39418598b96aaad1bdca00", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Output()" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "866be87a83a548e0a61b66c0ecf08e18", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "HBox(children=(Dropdown(description='Metric:', options=('CPU Usage (Min/Max/Mean)', 'CPU Usage (Raw)', 'Mem in…" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "1b852b9508984362a9c2a4731cae85b5", + "version_major": 2, + "version_minor": 0 + }, + "image/png": "", + "text/html": [ + "\n", + "
\n", + "
\n", + " Figure\n", + "
\n", + " \n", + "
\n", + " " + ], + "text/plain": [ + "Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "%%display_graph_for_all" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4336cf63-51b4-479b-9b5a-e5527788ffde", + "metadata": { + "ExecuteTime": { + "end_time": "2024-08-14T08:48:12.747027Z", + "start_time": "2024-08-14T08:48:12.735846Z" + } + }, + "outputs": [], + "source": [ + "%%display_code_history" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "63be1a14-4659-47d7-9358-c2d435d46bee", + "metadata": { + "ExecuteTime": { + "end_time": "2024-08-14T08:45:20.906658Z", + "start_time": "2024-08-14T08:45:20.904172Z" + } + }, + "outputs": [], + "source": [ + "%%display_code_for_index 0" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8dde35f2-a3ec-4278-9188-d7be10cf0cc0", + "metadata": {}, + "outputs": [], + "source": [ + "%%display_code_for_index 1" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d20c7377-6c42-410d-b8f7-6bd83192dc05", + "metadata": {}, + "outputs": [], + "source": [ + "%%display_graph_for_index 0" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b3b5516b-be9c-49e0-8394-3c9fff3943de", + "metadata": { + "ExecuteTime": { + "end_time": "2024-08-14T08:46:50.316954Z", + "start_time": "2024-08-14T08:46:50.041909Z" + } + }, + "outputs": [], + "source": [ + "%%display_graph_for_last" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3ddda407-4989-493d-90ba-dbb89d0841f8", + "metadata": {}, + "outputs": [], + "source": [ + "%%perfdata_to_variable myvar" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "10fbf6a8-760b-4d72-b520-03b0346930b8", + "metadata": {}, + "outputs": [], + "source": [ + "myvar" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d1c55854-ce4c-4f5d-869a-9f920be6645b", + "metadata": {}, + "outputs": [], + "source": [ + "%%perfdata_to_json myfile" + ] + }, + { + "cell_type": "markdown", + "id": "ed4972cf-90c8-4ad8-81d8-b79dccad600b", + "metadata": {}, + "source": [ + "---\n", + "**Plans:**\n", + "- retrieve metrics on multiple nodes (add network, psutil delivers that)\n", + "- parallel serialization (for scorep)\n", + "- show index as thumbnail or so when hovering the graphs" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "29194d3f-ff1f-48a2-9a78-af8192b9dae8", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "JUmPER", + "language": "python", + "name": "jumper" + }, + "language_info": { + "file_extension": ".py", + "mimetype": "text/plain", + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/examples/ExampleNotebook.ipynb b/examples/ExampleNotebook.ipynb deleted file mode 100644 index d068b6b..0000000 --- a/examples/ExampleNotebook.ipynb +++ /dev/null @@ -1,506 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## The Score-P Python Kernel\n", - "This is the Score-P Python Kernel that allows you to execute Jupyter Notebooks with Score-P for performance analysis. It supports the usual Jupyter interactivity between cells though with some limitations (see **General Limitations**).\n", - "\n", - "The kernel requires [Score-P](https://www.vi-hps.org/projects/score-p/) and [Score-P Python bindings](https://github.com/score-p/scorep_binding_python) to be installed." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Setup\n", - "You can set up your Score-P environment by executing a cell that starts with the `%%scorep_env magic command`.\n", - "\n", - "You can set the Score-P Python binding arguments by executing a cell that starts with `%%scorep_python_binding_arguments`." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "vscode": { - "languageId": "any text" - } - }, - "outputs": [], - "source": [ - "%%scorep_env\n", - "SCOREP_ENABLE_TRACING=1\n", - "SCOREP_ENABLE_PROFILING=0\n", - "SCOREP_TOTAL_MEMORY=3g" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "vscode": { - "languageId": "any text" - } - }, - "outputs": [], - "source": [ - "%%scorep_python_binding_arguments\n", - "--noinstrumenter" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Cells instrumentation\n", - "\n", - "Cells that should be executed with Score-P have to be marked with `%%execute_with_scorep` in the first line. Cells without that command are executed as ordinary Python processes." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "vscode": { - "languageId": "any text" - } - }, - "outputs": [], - "source": [ - "%%execute_with_scorep\n", - "import scorep\n", - "class A:\n", - " desc = \"This class and method should be...\"\n", - " def print_desc(self, x):\n", - " print(self.desc + str(x))\n", - "\n", - "a = A()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "vscode": { - "languageId": "any text" - } - }, - "outputs": [], - "source": [ - "a.print_desc(\"known here\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "vscode": { - "languageId": "any text" - } - }, - "outputs": [], - "source": [ - "a.desc = \"new desc\"" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "vscode": { - "languageId": "any text" - } - }, - "outputs": [], - "source": [ - "print(a.desc)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "vscode": { - "languageId": "any text" - } - }, - "outputs": [], - "source": [ - "%%execute_with_scorep\n", - "import scorep\n", - "with scorep.instrumenter.enable():\n", - " a.desc = \"new desc2\"" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "vscode": { - "languageId": "any text" - } - }, - "outputs": [], - "source": [ - "print(a.desc)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "vscode": { - "languageId": "any text" - } - }, - "outputs": [], - "source": [ - "%%execute_with_scorep\n", - "import scorep\n", - "import time\n", - "\n", - "def sleep_and_double(x):\n", - " time.sleep(x)\n", - " return 2*x\n", - "\n", - "with scorep.instrumenter.enable():\n", - " x = 5\n", - " x = sleep_and_double(x)\n", - " x = sleep_and_double(x)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "vscode": { - "languageId": "any text" - } - }, - "outputs": [], - "source": [ - "print(x)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Multicell mode\n", - "You can also treat multiple cells as one single cell by using the multicell mode.\n", - "\n", - "For that, you can mark the cells in the order you wish to execute them. Start the marking process by a cell that starts with the `%%enable_multicellmode` command.\n", - "\n", - "Now mark your cells by running them. Note that the cells will not be executed at this point but will be marked for later execution.\n", - "You can stop the marking and execute all the marked cells by running a cell that starts with `%%finalize_multicellmode` command.\n", - "This will execute all the marked cells orderly with Score-P. Note that the `%%execute_with_scorep` command has no effect in the multi cell mode.\n", - "\n", - "There is no \"unmark\" command available but you can abort the multicellmode by the `%%abort_multicellmode` command. Start your marking process again if you have marked your cells in the wrong order.\n", - "\n", - "The `%%enable_multicellmode`, `%%finalize_multicellmode` and `%%abort_multicellmode` commands should be run in an exclusive cell. Additional code in the cell will be ignored." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "vscode": { - "languageId": "any text" - } - }, - "outputs": [], - "source": [ - "%%enable_multicellmode" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "vscode": { - "languageId": "any text" - } - }, - "outputs": [], - "source": [ - "with scorep.instrumenter.enable():\n", - " class B:\n", - " desc = \"This is a class defined in multi cell mode\"\n", - " def print_desc(self, x):\n", - " print(self.desc + str(x))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "vscode": { - "languageId": "any text" - } - }, - "outputs": [], - "source": [ - "import scorep\n", - "with scorep.instrumenter.enable():\n", - " b = B()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "vscode": { - "languageId": "any text" - } - }, - "outputs": [], - "source": [ - "with scorep.instrumenter.enable():\n", - " b.print_desc(\"...and this object is initialized and used in it.\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "vscode": { - "languageId": "any text" - } - }, - "outputs": [], - "source": [ - "b.desc = \"modified desc\"" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "vscode": { - "languageId": "any text" - } - }, - "outputs": [], - "source": [ - "print(b.desc)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "vscode": { - "languageId": "any text" - } - }, - "outputs": [], - "source": [ - "%%finalize_multicellmode" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Write mode\n", - "\n", - "With write mode you can convert notebook cells into Python script which is then to be executed by Score-P bindings using auxillary bash script. \n", - "\n", - "Similarly to multicell mode, you can run a cell with `%%start_writefile` magic command to enable write mode. Then, running the cells will record their contents instead of executing them. Environment variables and Score-P Python bindings arguments will be written to bash script. Finish the write mode with `%%end_writefile` cell.\n", - "\n", - "You can specify Python script name by providing it as an argument for `%%start_writefile`." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "vscode": { - "languageId": "plaintext" - } - }, - "outputs": [], - "source": [ - "%%start_writefile myscript.py" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "vscode": { - "languageId": "plaintext" - } - }, - "outputs": [], - "source": [ - "%%scorep_env\n", - "SCOREP_ENABLE_TRACING=1\n", - "SCOREP_ENABLE_PROFILING=0\n", - "SCOREP_TOTAL_MEMORY=3g" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "vscode": { - "languageId": "plaintext" - } - }, - "outputs": [], - "source": [ - "%%scorep_python_binding_arguments\n", - "--noinstrumenter" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "vscode": { - "languageId": "plaintext" - } - }, - "outputs": [], - "source": [ - "print(\"Cell without instrumentation.\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "vscode": { - "languageId": "plaintext" - } - }, - "outputs": [], - "source": [ - "%%execute_with_scorep\n", - "\n", - "import numpy as np\n", - "import scorep\n", - "\n", - "a = np.array([1, 2, 3])\n", - "b = np.array([4, 5, 6])\n", - "c = a.dot(b)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "vscode": { - "languageId": "plaintext" - } - }, - "outputs": [], - "source": [ - "%%enable_multicellmode\n", - "\n", - "with scorep.instrumenter.enable():\n", - " d = a.outer(b)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "vscode": { - "languageId": "plaintext" - } - }, - "outputs": [], - "source": [ - "with scorep.instrumenter.enable():\n", - " e = b.outer(a)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "vscode": { - "languageId": "plaintext" - } - }, - "outputs": [], - "source": [ - "%%abort_multicellmode" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "vscode": { - "languageId": "plaintext" - } - }, - "outputs": [], - "source": [ - "%%finalize_multicellmode" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "vscode": { - "languageId": "plaintext" - } - }, - "outputs": [], - "source": [ - "%%end_writefile" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "You can now run `myscript_run.sh` to execute Python script with Score-P bindings." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Presentation of Performance Data\n", - "\n", - "To inspect the collected performance data you can use tools such as [Vampir](https://vampir.eu/) (Trace) or [Cube](https://www.scalasca.org/software/cube-4.x/) (Profile).\n", - "\n", - "### Future Work\n", - "\n", - "The kernel is still under development. If you have any questions or wishes, please report to elias.werner@tu-dresden.de\n", - " \n", - "PRs are welcome.\n", - "\n", - "### General Limitations \n", - "\n", - "For the execution of a cell, the kernel starts a new Python process either with Score-P or standalone. The kernel handles persistency between these processes on its own. Therefore it uses pickle/shelve and additional techniques. However this comes with the following drawbacks:\n", - "\n", - "- when dealing with big data structures, there might be a big runtime overhead at the beginning and the end of a cell. This is due to additional data saving and loading processes for persistency in the background. However this does not affect the actual user code and the Score-P measurements.\n", - "- Pickle/Shelve can not handle each kind ob Python object (e.g. file handles, network connections,...). Thus, they can not be shared between cells and your notebook might not work.\n", - "- Pickle/Shelve does not store class information but gives a reference to the class when storing a class instance. Thus, overwriting classes differs from the ordinary Python way. E.g. if you define a class and an object of this class in one cell and overwrite the class in a different cell, the defined object will also be changed. So please avoid class overwriting." - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "scorep-python", - "language": "python", - "name": "scorep-python" - }, - "language_info": { - "file_extension": ".py", - "mimetype": "text/plain", - "name": "Any text" - } - }, - "nbformat": 4, - "nbformat_minor": 4 -} diff --git a/examples/demonstrator.ipynb b/examples/demonstrator.ipynb deleted file mode 100644 index dc8e010..0000000 --- a/examples/demonstrator.ipynb +++ /dev/null @@ -1,340 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# This is a demonstrator of the Score-P Python Kernel\n", - "This is the Score-P Python Kernel that allows you to execute Jupyter Notebooks with Score-P.\n", - "\n", - "The kernel supports the usual jupyter interactivity between cells but with some limitations (see \"General Limitations\").\n", - "\n", - "## Setup\n", - "You can set up your Score-P environment by executing a cell that starts with the %%scorep_env magic command.\n", - "\n", - "You can set the Score-P Python binding arguments by executing a cell that starts with %%scorep_python_binding_arguments.\n", - "\n", - "## Usage\n", - "Cells that should be executed with Score-P have to be marked with %%execute_with_scorep in the first line. Cells without that command are executed as ordinary Python processes.\n", - "\n", - "### Multi Cell Mode\n", - "You can also treat multiple cells as one single cell by using the multi cell mode.\n", - "\n", - "Therefore you can mark the cells in the order you wish to execute them. Start the marking process by a cell that starts with the %%enable_multicellmode command.\n", - "Now mark your cells by running them. Note that the cells will not be executed at this point but will be marked for later execution.\n", - "You can stop the marking and execute all the marked cells by running a cell that starts with %%finalize_multicellmode command.\n", - "This will execute all the marked cells orderly with Score-P. Note that the %%execute_with_scorep command has no effect in the multi cell mode.\n", - "\n", - "There is no \"unmark\" command available but you can abort the multicellmode by the %%abort_multicellmode command. Start your marking process again if you have marked your cells in the wrong order.\n", - "\n", - "The %%enable_multicellmode, %%finalize_multicellmode and %%abort_multicellmode commands should be run in an exclusive cell. Additional code in the cell will be ignored.\n", - "\n", - "### Presentation of Performance Data\n", - "\n", - "To inspect the collected performance data, use tools as Vampir (Trace) or Cube (Profile)." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "---\n", - "# Set up SCORE-P Environment" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Score-P environment set successfully: {'SCOREP_ENABLE_TRACING': '1', 'SCOREP_ENABLE_PROFILING': '0', 'SCOREP_TOTAL_MEMORY': '3g'}" - ] - } - ], - "source": [ - "%%scorep_env\n", - "SCOREP_ENABLE_TRACING=1\n", - "SCOREP_ENABLE_PROFILING=0\n", - "SCOREP_TOTAL_MEMORY=3g" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Score-P Python binding arguments set successfully: ['--noinstrumenter', '--noinstrumenter']" - ] - } - ], - "source": [ - "%%scorep_python_binding_arguments\n", - "--noinstrumenter" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "import scorep\n", - "import pandas as pd\n", - "import numpy as np\n", - "from sklearn.preprocessing import StandardScaler\n", - "import sys" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "---\n", - "# Example 1: Data Conjunction" - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "metadata": {}, - "outputs": [], - "source": [ - "nrows, ncols = 1000000, 20\n", - "df1, df2, df3, df4 = [pd.DataFrame(np.random.randn(nrows, ncols)) for _ in range(4)]" - ] - }, - { - "cell_type": "code", - "execution_count": 17, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\u0000Instrumentation results can be found in /home/visitor/Demonstrators/score-p_kernel/supplementary/example/scorep-20230630_1737_31341656626898" - ] - } - ], - "source": [ - "%%execute_with_scorep\n", - "with scorep.instrumenter.enable():\n", - " # data conjunction\n", - " df5 = df1 + df2 +df3 + df4" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\u0000Instrumentation results can be found in /home/visitor/Demonstrators/score-p_kernel/supplementary/example/scorep-20230630_1737_31353813505508" - ] - } - ], - "source": [ - "%%execute_with_scorep\n", - "with scorep.instrumenter.enable():\n", - " # data conjunction\n", - " df5 = pd.eval(\"df1 + df2 +df3 + df4\", engine='numexpr')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "---\n", - "# Example 2: Deep Learning" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "filename = \"fairytales_demo.txt\"" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# imports\n", - "import scorep\n", - "import logging\n", - "\n", - "logging.basicConfig(\n", - " format=\"%(asctime)s - %(levelname)s - %(name)s - %(message)s\",\n", - " datefmt=\"%d/%m/%Y %H:%M:%S\",\n", - " level=logging.INFO)\n", - "\n", - "from utils import set_seed\n", - "set_seed(42)\n", - "\n", - "import numpy as numpy\n", - "import torch\n", - "import torch.nn as nn\n", - "from torch.nn import functional as F\n", - "\n", - "import math\n", - "from torch.utils.data import Dataset\n", - "\n", - "from model import GPT, GPTconfig\n", - "from trainer import Trainer, TrainerConfig" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# defining the data set\n", - "class CharDataset(Dataset):\n", - " def __init__(self, data, block_size):\n", - " chars = sorted(list(set(data)))\n", - " data_size, vocab_size = len(data), len(chars)\n", - " print(\"data has %d characters, %d unique.\" % (data_size, vocab_size))\n", - "\n", - " self.stoi = {ch:i for i, ch in enumerate(chars)}\n", - " self.itos = {i:ch for i, ch in enumerate(chars)}\n", - " self.block_size = block_size\n", - " self.vocab_size = vocab_size\n", - " self.data = data\n", - "\n", - " def __len__(self):\n", - " return len(self.data) - self.block_size\n", - "\n", - " def __getitem__(self, idx):\n", - " chunk = self.data[idx : idx+self.block_size+1]\n", - " dix = [self.stoi[s] for s in chunk]\n", - "\n", - " x = torch.tensor(dix[:-1], dtype = torch.long)\n", - " y = torch.tensor(dix[1:], dtype = torch.long)\n", - " return x, y" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "%%execute_with_scorep\n", - "\n", - "with scorep.instrumenter.enable():\n", - " block_size = 32\n", - "\n", - " text = open(\"./{}\".format(filename), \"r\").read()\n", - " train_dataset = CharDataset(text, block_size)\n", - "\n", - " \n", - " mconf = GPTconfig(train_dataset.vocab_size, train_dataset.block_size,\n", - " n_layer=4, n_head=4, n_embd=256)\n", - " model = GPT(mconf)\n", - "\n", - " tconf = TrainerConfig(max_epochs=1, batch_size=1024, learning_rate=0.01,\n", - " lr_decay=True, warmup_tokens=512*20, final_tokens=2*len(train_dataset)*block_size,\n", - " num_workers=1)\n", - " trainer = Trainer(model, train_dataset, None, tconf)\n", - "\n", - " torch.cuda.empty_cache()\n", - " trainer.train()\n", - "\n", - " torch.save(model.state_dict(), \"./saved_models/trained_gpt_model\")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "tags": [] - }, - "source": [ - "![image info](trace_profile.png)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Try your model" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "context = \"The sun shone in the sky.\"" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "%%execute_with_scorep\n", - "from utils import sample\n", - "\n", - "with scorep.instrumenter.enable():\n", - " x = torch.tensor([train_dataset.stoi[s] for s in context], dtype=torch.long)[None,...].to(trainer.device)\n", - " y = sample(model, x, 2000, temperature=1.0, sample=True, top_k=10)[0]\n", - "\n", - " completion = ''.join([train_dataset.itos[int(i)] for i in y])\n", - " print(completion)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "---\n", - "# Playground\n", - "Feel free to add your code and to analyze it\n", - "\n", - "Begin a cell with:\n", - "%%execute_with_scorep" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "scorep-python", - "language": "python", - "name": "scorep-python" - }, - "language_info": { - "file_extension": ".py", - "mimetype": "text/plain", - "name": "Any text" - } - }, - "nbformat": 4, - "nbformat_minor": 4 -} diff --git a/examples/fairytales_demo.txt b/examples/fairytales_demo.txt deleted file mode 100644 index 220f605..0000000 --- a/examples/fairytales_demo.txt +++ /dev/null @@ -1,136 +0,0 @@ -The Three Heads of the Well -LONG before Arthur and the Knights of the Round Table, there reigned in the eastern part of England a king who kept his court at Colchester. -In the midst of all his glory, his queen died, leaving behind her an only daughter, about fifteen years of age who for her beauty and kindness was the wonder of all that knew her. But the king hearing of a lady who had likewise an only daughter, had a mind to marry her for the sake of her riches, though she was old, ugly, hook-nosed, and hump-backed. Her daughter was a yellow dowdy, full of envy and ill-nature; and, in short, was much of the same mould as her mother. But in a few weeks the king, attended by the nobility and gentry, brought his deformed bride to the palace, where the marriage rites were performed. She had not been long in the Court before she set the king against his own beautiful daughter by false reports. The young princess having lost her father's love, grew weary of the Court, and one day, meeting with her father in the garden, she begged him, with tears in her eyes, to let her go and seek her fortune; to which the king consented, and ordered her mother-in-law to give her what she pleased. She went to the queen, who gave her a canvas bag of brown bread and hard cheese, with a bottle of beer. Though this was but a pitiful dowry for a king's daughter, she took it, with thanks, and proceeded on her journey, passing through groves, woods, and valleys, till at length she saw an old man sitting on a stone at the mouth of a cave, who said: "Good morrow, fair maiden, whither away so fast?" -"Aged father," says she, "I am going to seek my fortune." -"What have you got in your bag and bottle?" -"In my bag I have got bread and cheese, and in my bottle good small beer. Would you like to have some?" -"Yes " said he, "with all my heart." -With that the lady pulled out the provisions, and bade him eat and welcome. He did so, and gave her many thanks, and said: "There is a thick thorny hedge before you, which you cannot get through, but take this wand in your hand, strike it three times, and say, 'Pray, hedge, let me come through,' and it will open immediately; then, a little further, you will find a well; sit down on the brink of it, and there will come up three golden heads, which will speak; and whatever they require, that do." Promising she would, she took her leave of him. Coming to the hedge and using the old man's wand, it divided, and let her through; then, coming to the well, she had no sooner sat down than a golden head came up singing: -"Wash me and comb me, And lay me down softly. And lay me on a bank to dry, That I may look pretty, When somebody passes by." -"Yes," said she, and taking it in her lap combed it with a silver comb, and then placed it upon a primrose bank. Then up came a second and a third head, -saying the same as the former. So she did the same for them, and then, pulling out her provisions, sat down to eat her dinner. -Then said the heads one to another: "What shall we weird for this damsel who has used us so kindly?" -The first said: "I weird her to be so beautiful that she shall charm the most powerful prince in the world" -The second said: "I weird her such a sweet voice as shall far exceed the nightingale." -The third said: "My gift shall be none of the least, as she is a king's daughter, I'll weird her so fortunate that she shall become queen to the greatest prince that reigns." -She then let them down into the well again, and so went on her journey. She had not travelled long before she saw a king hunting in the park with his nobles. She would have avoided him, but the king, having caught a sight of her, approached, and what with her beauty and sweet voice, fell desperately in love with her, and soon induced her to marry him. -This king finding that she was the king of Colchester's daughter, ordered some chariots to be got ready, that he might pay the king, his father-in-law, a visit. The chariot in which the king and queen rode was adorned with rich gems of gold. The king, her father, was at first astonished that his daughter had been so fortunate, till the young king let him know of all that had happened. Great was the joy at Court amongst all, with the exception of the queen and her club-footed daughter, who were ready to burst with envy. The rejoicings, with feasting and dancing continued many days. Then at length they returned home with the dowry her father gave her. -The hump-backed princess, perceiving that her sister had been so lucky in seeking her fortune, wanted to do the same; so she told her mother, and all preparations were made, and she was furnished with rich dresses, and with sugar, almonds, and sweetmeats, in great quantities, and a large bottle of Malaga sack. With these she went the same road as her sister; and coming near the cave, the old man said: "Young woman, whither so fast?" -"What's that to you?" said she. -"Then," said he, "what have you in your bag and bottle?" -She answered: "Good things, which you shall not be troubled with." -"Won't you give me some?" said he. -"No, not a bit, nor a drop, unless it would choke you." -The old man frowned, saying: "Evil fortune attend ye!" -Going on, she came to the hedge, through which she espied a gap, and thought to pass through it; but the hedge closed, and the thorns ran into her flesh, so that it was with great difficulty that she got through. Being now all over blood, she searched for water to wash herself, and, looking round she saw the well. She sat down on the brink of it, and one of the heads came up saying: "Wash me, comb me, and lay me down softly," as before, but she banged it with her bottle, saying, "Take that for your washing." So the second and third heads came up, and met with no better treatment than the first. Whereupon the heads consulted among themselves what evils to plague her with for such usage. -The first said "Let her be struck with leprosy in her face." -The second: "Let her voice be as harsh as a corncrake's." -The third said: "Let her have for husband but a poor country cobbler." -Well, on she went till she came to a town, and it being market-day, the people looked at her, and, seeing such an ugly face, and hearing such a squeaky voice, all fled but a poor country cobbler. Now he not long before had mended the shoes of an old hermit, who, having no money, gave him a box of ointment for the cure of the leprosy, and a bottle of spirits for a harsh voice. So the cobbler, having a mind to do an act of charity, was induced to go up to her and ask her who she was. -"I am," said she, "the king of Colchester's daughter-in-law." -"Well," said the cobbler, "if I restore you to your natural complexion, and make a sound cure both in face and voice, will you in reward take me for a husband?" -"Yes, friend," replied she, "with all my heart!" -With this the cobbler applied the remedies, and they made her well in a few weeks; after which they were married, and so set forward for the Court at Colchester. When the queen found that her daughter had married nothing but a poor cobbler she hanged herself in wrath. The death of the queenso pleased the king, who was glad to get rid of her so soon, that he gave the cobbler a hundred pounds, to quit the Court with his lady, and take to a remote part of the kingdom, where he lived many years mending shoes, his wife spinning the thread for him. - - -<|endoftext|> - - -Master of all Masters -A GIRL once went to the fair to hire herself for servant. At last a funny-looking old gentleman engaged her, and took her home to his house. When she got there, he told her that he had -He said to her: "What will you call me?" -"Master or mister, or whatever you please sir," says she. -He said: "You must call me 'master of all masters.' And what would you call this?" pointing to his bed. -"Bed or couch, or whatever you please, sir." -"No, that's my 'barnacle.' And what do you call these?" said he pointing to his pantaloons. -"Breeches or trousers, or whatever you please, sir." -"You must call them 'squibs and crackers.' And what would you call her?" pointing to the cat. -"Cat or kit, or whatever you please, sir." -"You must call her 'white-faced simminy.' And this now," showing the fire, "what would you call this?" -"Fire or flame, or whatever you please, sir." -"You must call it 'hot cockalorum,' and what this?" he went on, pointing to the water. -"Water or wet, or whatever you please, sir." -"No, 'pondalorum' is its name. And what do you call all this?" asked he as he pointed to the house. -"House or cottage, or whatever you please, sir." -"You must call it 'high topper mountain.'" -That very night the servant woke her master up in a fright and said: "Master of all masters, get out of your barnacle and put on your squibs and crackers. For white-faced simminy has got a spark of hot cockalorum on its tail, and unless you get some pondalorum high topper mountain will be all on hot cockalorum" . . -. . . . . . That's all. - - -<|endoftext|> - - -The Well of the World's End -ONCE upon a time, and a very good time it was, though it wasn't in my time, nor in your time, nor any one else's time, there was a girl whose mother had died, and her father married again. And her stepmother hated her because she was more beautiful than herself, and she was very cruel to her. She used to make her do all the servant's work, and never let her have any peace. At last, one day, the stepmother thought to get rid of her altogether; so she handed her a sieve and said to her: "Go, fill it at the Well of the World's End and bring it home to me full, or woe betide you." For she thought she would never be able to find the Well of the World's End, and, if she did, how could she bring home a sieve full of water? -Well, the girl started off, and asked every one she met to tell her where was the Well of the World's End. But nobody knew, and she didn't know what to do, when a queer little old woman, all bent double, told her where it was, and how she could get to it. So she did what the old woman told her, and at last arrived at the Well of the World's End. But when she dipped the sieve in the cold, cold water, it all ran out again. She tried and she tried again, but every time it was the same; and at last she sate down and cried as if her heart would break. -Suddenly she heard a croaking voice, and she looked up and saw a great frog with goggle eyes looking at her and speaking to her. -"What's the matter, dearie?" it said. -"Oh, dear, oh dear," she said, "my stepmother has sent me all this long way to fill this sieve with water from the Well of the World's End, and I can't fill it no how at all." -"Well," said the frog, "if you promise me to do whatever I bid you for a whole night long, I'll tell you how to fill it." -So the girl agreed, and the frog said: -"Stop it with moss and daub it with clay, And then it will carry the water away;" -and then it gave a hop, skip, and jump, and went flop into the Well of the World's End. -So the girl looked about for some moss, and lined the bottom of the sieve with it, and over that she put some clay, and then she dipped it once again into the Well of the World's End; and this time, the water didn't run out, and she turned to go away. -Just then the frog popped up its head out of the Well of the World's End, and said: "Remember your promise." -"All right," said the girl; for thought she, "what harm can a frog do me?" -So she went back to her stepmother, and brought the sieve full of water from the Well of the World's End. The stepmother was angry as angry, but she said nothing at all. -That very evening they heard something tap tapping at the door low down, and a voice cried out: -"Open the door, my hinny, my heart, Open the door, my own darling; Mind you the words that you and I spoke, Down in the meadow, at the World's End Well." -"Whatever can that be?" cried out the stepmother, and the girl had to tell her all about it, and what she had promised the frog. -"Girls must keep their promises," said the stepmother. "Go and open the door this instant." For she was glad the girl would have to obey a nasty frog. -So the girl went and opened the door, and there was the frog from the Well of the World's End. And it hopped, and it hopped, and it jumped, till it reached the girl, and then it said: -"Lift me to your knee, my hinny, my heart; Lift me to your knee, my own darling; Remember the words you and I spoke, Down in the meadow by the World's End Well." -But the girl didn't like to, till her stepmother said: "Lift it up this instant, you hussy! Girls must keep their promises!" -THE WELL OF THE WORLD'S END. -"Give me some supper, my hinny, my heart, Give me some supper, my darling; Remember the words you and I spake, In the meadow, by the Well of the World's End." -Well, she didn't mind doing that, so she got it a bowl of milk and bread, and fed it well. And when the frog had finished, it said: -"Go with me to bed, my hinny, my heart, Go with me to bed, my own darling; Mind you the words you spake to me, Down by the cold well, so weary." -But that the girl wouldn't do, till her stepmother said: "Do what you promised, girl; girls must keep their promises. Do what you're bid, or out you go, you and your froggie." -So the girl took the frog with her to bed, and kept it as far away from her as she could. Well, just as the day was beginning to break what should the frog say but: -"Chop off my head, my hinny, my heart, Chop off my head, my own darling; Remember the promise you made to me, Down by the cold well so weary." -At first the girl wouldn't, for she thought of what the frog had done for her at the Well of the World's End. But when the frog said the words over again she went and took an axe and chopped off its head and lo! and behold, there stood before her a handsome young prince, who told her that he had been enchanted by a wicked magician, and he could never be unspelled till some girl would do his bidding for a whole night, and chop off his head at the end of it. -The stepmother was surprised indeed when she found the young prince instead of the nasty frog, and she wasn't best pleased, you may be sure, when the prince told her that he was going to marry her stepdaughter because she had unspelled him. But married they were, and went away to live in the castle of the king, his father, and all the stepmother had to console her was, that it was all through her that her stepdaughter was married to a prince. - - -<|endoftext|> - - -Fairy Ointment -DAME GOODY was a nurse that looked after sick people, and minded babies. One night she was woke up at midnight, and when she went downstairs, she saw a strange squinny-eyed, little ugly old fellow, who asked her to come to his wife who was too ill to mind her baby. Dame Goody didn't like the look of the old fellow, but business is business; so she popped on her things, and went down to him. And when she got down to him, he whisked her up on to a large coal-black horse with fiery eyes, that stood at the door; and soon they were going at a rare pace, Dame Goody holding on to the old fellow like grim death. -They rode, and they rode, till at last they stopped before a cottage door. So they got down and went in and found the good woman abed with the children playing about; and the babe, a fine bouncing boy, beside her. -Dame Goody took the babe, which was as fine a baby boy as you'd wish to see. The mother, when she handed the baby to Dame Goody to mind, gave her a box of ointment, and told her to stroke the baby's eyes with it as soon as it opened them. After a while it began to open its eyes. Dame Goody saw that it had squinny eyes just like its father. So she took the box of ointment and stroked its two eyelids with it But she couldn't help wondering what it was for, as she had never seen such a thing done before. So she looked to see if the others were looking, and, when they were not noticing, she stroked her own right eyelid with the ointment. -No sooner had she done so, than everything seemed changed about her. The cottage became elegantly furnished. The mother in the bed was a beautiful lady, dressed up in white silk. The little baby was still more beautiful than before, and its clothes were made of a sort of silvery gauze. Its little brothers and sisters around the bed were flat-nosed imps with pointed ears, who made faces at one another, and scratched their polls. Sometimes they would pull the sick lady's ears with their long and hairy paws. In fact, they were up to all kinds of mischief; and Dame Goody knew that she had got into a house of pixies. But she said nothing to nobody, and as soon as the lady was well enough to mind the baby, she asked the old fellow to take her back home. So he came round to the door with the coal-black horse with eyes of fire, and off they went as fast as before, or perhaps a little faster, till they came to Dame Goody's cottage, where the squinny-eyed old fellow lifted her down and left her, thanking her civilly enough, and paying her more than she had ever been paid before for such service. -Now next day happened to be market-day, and as Dame Goody had been away from home, she wanted many things in the house, and trudged off to get them at the market. As she was buying the things she wanted, who should she see but the squinny-eyed old fellow who had taken her on the coal-black horse. And what do you think he was doing? Why he went about from stall to stall taking up things from each, here some fruit, and there some eggs, and so on; and no one seemed to take any notice. -Now Dame Goody did not think it her business to interfere, but she thought she ought not to let so good a customer pass without speaking. So she ups to him and bobs a curtsey and said: "Gooden, sir, I hopes as how your good lady and the little one are as well as—" -But she couldn't finish what she was a-saying, for the funny old fellow started back in surprise, and he says to her, says he: "What! do you see me to-day?" -"See you," says she, "why, of course I do, as plain as the sun in the skies, and what's more," says she, "I see you are busy too, into the bargain.""Ah, you see too much," said he; "now, pray, with which eye do you see all this?""With the right eye to be sure," said she, as proud as can be to find him out. -"The ointment! The ointment!" cried the old pixy thief. "Take that for meddlng with what don't concern you: you shall see me no more." And with that he struck her on her right eye, and she couldn't see him any more; and, what was worse, she was blind on the right side from that hour till the day of her death. - - -<|endoftext|> - - -The Ass, the Table, and the Stick -A LAD named Jack was once so unhappy at home through his father's ill-treatment, that he made up his mind to run away and seek his fortune in the wide world. -He ran, and he ran, until he could run no longer, and then he ran right up against a little old woman who was gathering sticks. He was too much out of breath to beg pardon, but the woman was good-natured, and she said he seemed to be a likely lad, so she would take him to be her servant, and would pay him well. He agreed, for he was very hungry, and she brought him to her house in the wood, where he served her for twelvemonths and a day. When the year had passed, she called him to her, and said she had good wages for him. So she presented him with an ass out of the stable, and he had but to pull Neddy's ears to make him begin at once to ee—aw! And when he brayed there dropped from his mouth silver sixpences, and halfcrowns, and golden guineas. -The lad was well pleased with the wage he had received, and away he rode till he reached an inn. There he ordered the best of everything, and when the innkeeper refused to serve him without being paid beforehand, the boy went off to the stable, pulled the ass's ears and obtained his pocket full of money. The host had watched all this through a crack in the door, and when night came on he put an ass of his own for the precious Neddy of the poor youth. So Jack, without knowing that any change had been made, rode away next morning to his father's house. -Now, I must tell you that near his home dwelt a poor widow with an only daughter. The lad and the maiden were fast friends and true-loves; but when Jack asked his father's leave to marry the girl, "Never till you have the money to keep her," was the reply. "I have that, father," said the lad, and going to the ass he pulled its long ears; well, he pulled, and he pulled, till one of them came off in his hands; but Neddy, though he hee-hawed and he hee-hawed let fall no halfcrowns or guineas. The father picked up a hayfork and beat his son out of the house. I promise you he ran. Ah! he ran and ran till he came bang against a door, and burst it open, and there he was in a joiner's shop. "You're a likely lad," said the joiner; "serve me for a twelvemonths and a day and I will pay you well." So he agreed, and served the carpenter for a year and a day. "Now," said the master, "I will give you your wage;" and he presented him with a table, telling him he had but to say, "Table, be covered," and at once it would be spread with lots to eat and drink. -Jack hitched the table on his back, and away he went with it till he came to the inn. "Well, host," shouted he, "my dinner to-day, and that of the best." -"Very sorry, but there is nothing in the house but ham and eggs." -"Ham and eggs for me!" exclaimed Jack. "I can do better than that. Come, my table, be covered!" -At once the table was spread with turkey and sausages, roast mutton, potatoes, and greens. The innkeeper opened his eyes, but he said nothing, not he. -That night he fetched down from his attic a table very like that of Jack's, and exchanged the two. Jack, none the wiser, next morning hitched the worthless table on to his back and carried it home." Now, father, may I marry my lass?" he asked. -"Not unless you can keep her," replied the father. -"Look here!" exclaimed Jack. "Father, I have a table which does all my bidding." -"Let me see it," said the old man. -The lad set it in the middle of the room, and bade it be covered; but all in vain, the table remained bare. In a rage, the father caught the warming-pan down from the wall and warmed his son's back with it so that the boy fled howling from the house, and ran and ran till he came to a river and tumbled in. A man picked him out and bade him help him in making a bridge over the river; and how do you think he was doing it. Why, by casting a tree across; so Jack climbed up to the top of the tree and threw his weight on it, so that when the man had rooted the tree up, Jack and the tree-head dropped on the farther bank. -"Thank you," said the man; "and now for what you have done I will pay you;" so saying, he tore a branch from the tree, and fettled it up into a club with his knife. "There," exclaimed he; "take this stick, and when you say to it, 'Up stick and bang him,' it will knock any one down who angers you." -The lad was overjoyed to get this stick—so away he went with it to the inn, and as soon as the innkeeper, appeared, "Up stick and bang him!" was his cry. At the word the cudgel flew from his hand and battered the old fellow on the back, rapped his head, bruised his arms, tickled his ribs, till he fell groaning on the floor; still the stick belaboured the prostrate man, nor would Jack call it off till he had got back the stolen ass and table. Then he galloped home on the ass, with the table on his shoulders, and the stick in his hand. When he arrived there he found his father was dead, so he brought his ass into the stable and pulled its ears till he had filled the manger with money. -It was soon known through the town that Jack had returned rolling in wealth, and accordingly all the girls in the place set their caps at him. "Now," said Jack, "I shall marry the richest lass in the place; so to-morrow do you all come in front of my house with your money in your aprons." -Next morning the street was full of girls with aprons held out, and gold and silver in them; but Jack's own sweetheart was among them, and she had neither gold nor silver, nought but two copper pennies that was all she had. -"Stand aside, lass," said Jack to her, speaking roughly. "Thou hast no silver nor gold—stand off from the rest." She obeyed, and the tears ran down her cheeks, and filled her apron with diamonds. -"Up stick and bang them!" exclaimed Jack; whereupon the cudgel leaped up, and running along the line of girls, knocked them all on the heads and left them senseless on the pavement. Jack took all their money and poured it into his true-love's lap. "Now, lass," he exclaimed, "thou art the richest, and I shall marry thee." - - -<|endoftext|> - diff --git a/examples/gpt-demo/01-GPT-Training.ipynb b/examples/gpt-demo/01-GPT-Training.ipynb index 625b5a4..597be33 100644 --- a/examples/gpt-demo/01-GPT-Training.ipynb +++ b/examples/gpt-demo/01-GPT-Training.ipynb @@ -17,7 +17,11 @@ { "cell_type": "code", "execution_count": 1, - "metadata": {}, + "metadata": { + "vscode": { + "languageId": "any text" + } + }, "outputs": [ { "name": "stdout", @@ -28,16 +32,19 @@ } ], "source": [ - "%%scorep_env\n", - "SCOREP_ENABLE_TRACING=1\n", - "SCOREP_ENABLE_PROFILING=0\n", - "SCOREP_TOTAL_MEMORY=3g" + "%env SCOREP_ENABLE_TRACING=1\n", + "%env SCOREP_ENABLE_PROFILING=0\n", + "%env SCOREP_TOTAL_MEMORY=3g" ] }, { "cell_type": "code", "execution_count": 2, - "metadata": {}, + "metadata": { + "vscode": { + "languageId": "any text" + } + }, "outputs": [ { "name": "stdout", @@ -55,7 +62,11 @@ { "cell_type": "code", "execution_count": 3, - "metadata": {}, + "metadata": { + "vscode": { + "languageId": "any text" + } + }, "outputs": [], "source": [ "filename = \"fairytales.txt\"" @@ -64,7 +75,11 @@ { "cell_type": "code", "execution_count": 4, - "metadata": {}, + "metadata": { + "vscode": { + "languageId": "any text" + } + }, "outputs": [ { "name": "stdout", @@ -169,14 +184,14 @@ "lastKernelId": null }, "kernelspec": { - "display_name": "scorep-python3", - "language": "python3", - "name": "scorep-python3" + "display_name": "JUmPER", + "language": "python", + "name": "jumper" }, "language_info": { "file_extension": ".py", "mimetype": "text/plain", - "name": "Any text" + "name": "python" } }, "nbformat": 4, diff --git a/src/jumper/kernel.py b/src/jumper/kernel.py index 3f0b2bd..46b72d9 100644 --- a/src/jumper/kernel.py +++ b/src/jumper/kernel.py @@ -5,6 +5,7 @@ import subprocess import sys import time +import shutil from enum import Enum from textwrap import dedent @@ -73,7 +74,6 @@ def __init__(self, **kwargs): self.blacklist_prefixes = ["%lsmagic"] self.scorep_binding_args = [] - self.scorep_env = {} os.environ["SCOREP_KERNEL_PERSISTENCE_DIR"] = "./" self.pershelper = PersHelper("dill", "memory") @@ -96,6 +96,8 @@ def __init__(self, **kwargs): self.perfdata_handler = PerformanceDataHandler() self.nodelist = self.perfdata_handler.get_nodelist() + self.scorep_available_ = shutil.which("scorep") + def cell_output(self, string, stream="stdout"): """ Display string as cell output. @@ -111,6 +113,13 @@ def standard_reply(self): "payload": [], "user_expressions": {}, } + + def scorep_not_available(self): + if not self.scorep_available_: + self.cell_output("Score-P not available, cell ignored.", "stderr") + return self.standard_reply() + else: + return None def marshaller_settings(self, code): """ @@ -121,9 +130,9 @@ def marshaller_settings(self, code): self.pershelper.postprocess() marshaller_match = re.search( - r"MARSHALLER=(\w+)", code.split("\n", 1)[1] + r"MARSHALLER=([\w-]+)", code.split("\n", 1)[1] ) - mode_match = re.search(r"MODE=(\w+)", code.split("\n", 1)[1]) + mode_match = re.search(r"MODE=([\w-]+)", code.split("\n", 1)[1]) marshaller = ( marshaller_match.group(1) if marshaller_match else None ) @@ -132,11 +141,10 @@ def marshaller_settings(self, code): if marshaller: if not self.pershelper.set_marshaller(marshaller): self.cell_output( - f"Marshaller '{marshaller}' is not recognized, " + f"Marshaller '{marshaller}' is not available or compatible, " f"kernel will use '{self.pershelper.marshaller}'.", "stderr", ) - return self.standard_reply() if mode: if not self.pershelper.set_mode(mode): self.cell_output( @@ -193,40 +201,18 @@ def set_perfmonitor(self, code): ) return self.standard_reply() - def set_scorep_env(self, code): - """ - Read and record Score-P environment variables from the cell. - """ - if self.mode == KernelMode.DEFAULT: - for scorep_param in code.split("\n")[1:]: - if not scorep_param == "": - key, val = scorep_param.split("=") - self.scorep_env[key] = val - self.cell_output( - "Score-P environment set successfully: " + str(self.scorep_env) - ) - elif self.mode == KernelMode.WRITEFILE: - self.writefile_scorep_env += code.split("\n")[1:] - self.cell_output("Environment variables recorded.") - else: - self.cell_output( - f"KernelWarning: Currently in {self.mode}, command ignored.", - "stderr", - ) - return self.standard_reply() - def set_scorep_pythonargs(self, code): """ Read and record Score-P Python binding arguments from the cell. """ if self.mode == KernelMode.DEFAULT: - self.scorep_binding_args += code.split("\n")[1:] + self.scorep_binding_args = code.split("\n")[1].replace(' ', '\n').split("\n") self.cell_output( "Score-P Python binding arguments set successfully: " + str(self.scorep_binding_args) ) elif self.mode == KernelMode.WRITEFILE: - self.writefile_scorep_binding_args += code.split("\n")[1:] + self.writefile_scorep_binding_args = code.split("\n")[1].replace(' ', '\n').split("\n") self.cell_output("Score-P bindings arguments recorded.") else: self.cell_output( @@ -306,6 +292,9 @@ def start_writefile(self, code): # TODO: Edge cases processing, similar to multicellmode if self.mode == KernelMode.DEFAULT: self.mode = KernelMode.WRITEFILE + # init writefile_scorep_env and python binding args + self.writefile_scorep_env = [] + self.writefile_scorep_binding_args = [] writefile_cmd = code.split("\n")[0].split(" ") if len(writefile_cmd) > 1: if writefile_cmd[1].endswith(".py"): @@ -322,30 +311,31 @@ def start_writefile(self, code): os.path.realpath("") + "/" + self.writefile_base_name + ".py" ) - with os.fdopen(os.open(self.writefile_bash_name, os.O_WRONLY | os.O_CREAT), 'w') as bash_script: + with os.fdopen(os.open(self.writefile_bash_name, os.O_WRONLY | os.O_CREAT | os.O_TRUNC), 'w') as bash_script: bash_script.write( dedent( f""" # This bash script is generated automatically to run # Jupyter Notebook -> Python script conversion - # by Jumper kernel + # by JUmPER kernel # {self.writefile_python_name} # !/bin/bash """ ) ) - with os.fdopen(os.open(self.writefile_python_name, os.O_WRONLY | os.O_CREAT), 'w') as python_script: + with os.fdopen(os.open(self.writefile_python_name, os.O_WRONLY | os.O_CREAT | os.O_TRUNC), 'w') as python_script: python_script.write( dedent( f""" # This is the automatic conversion of - # Jupyter Notebook -> Python script by Jumper kernel. + # Jupyter Notebook -> Python script by JUmPER kernel. # Code corresponding to the cells not marked for # Score-P instrumentation is framed by # "with scorep.instrumenter.disable() # The script can be run with proper settings using # bash script {self.writefile_bash_name} import scorep + import os """ ) ) @@ -374,7 +364,9 @@ def append_writefile(self, code, explicit_scorep): Append cell to writefile. """ if self.mode == KernelMode.WRITEFILE: - if explicit_scorep or self.writefile_multicell: + if not code: + pass + elif explicit_scorep or self.writefile_multicell: with os.fdopen(os.open(self.writefile_python_name, os.O_WRONLY | os.O_APPEND), 'a') as python_script: python_script.write(code + "\n") self.cell_output( @@ -391,6 +383,11 @@ def append_writefile(self, code, explicit_scorep): self.cell_output( "Python commands without instrumentation recorded." ) + else: + self.cell_output( + f"KernelWarning: Currently in {self.mode}, command ignored.", + "stderr", + ) return self.standard_reply() def end_writefile(self): @@ -402,7 +399,7 @@ def end_writefile(self): self.mode = KernelMode.DEFAULT with os.fdopen(os.open(self.writefile_bash_name, os.O_WRONLY | os.O_APPEND), 'a') as bash_script: bash_script.write( - f"{' '.join(self.writefile_scorep_env)} " + f"{''.join(self.writefile_scorep_env)}\n" f"{PYTHON_EXECUTABLE} -m scorep " f"{' '.join(self.writefile_scorep_binding_args)} " f"{self.writefile_python_name}" @@ -415,6 +412,31 @@ def end_writefile(self): ) return self.standard_reply() + def abort_writefile(self): + """ + Cancel writefile mode. + """ + if self.mode == KernelMode.WRITEFILE: + self.mode = KernelMode.DEFAULT + + if os.path.exists(self.writefile_bash_name): + os.remove(self.writefile_bash_name) + if os.path.exists(self.writefile_python_name): + os.remove(self.writefile_python_name) + + self.writefile_base_name = "jupyter_to_script" + self.writefile_bash_name = "" + self.writefile_python_name = "" + self.writefile_scorep_binding_args = [] + self.writefile_multicell = False + self.cell_output("Writefile mode aborted.") + else: + self.cell_output( + f"KernelWarning: Currently in {self.mode}, command ignored.", + "stderr", + ) + return self.standard_reply() + def ghost_cell_error(self, reply_status, error_message): self.shell.execution_count += 1 reply_status["execution_count"] = self.shell.execution_count - 1 @@ -612,7 +634,6 @@ async def scorep_execute( code, code_for_history, silent, - store_history=True, user_expressions=None, allow_stdin=False, *, @@ -652,6 +673,7 @@ async def scorep_execute( allow_stdin=allow_stdin, cell_id=cell_id, ) + if reply_status_dump["status"] != "ok": self.ghost_cell_error( reply_status_dump, @@ -665,13 +687,12 @@ async def scorep_execute( + self.scorep_binding_args + [scorep_script_name] ) - proc_env = self.scorep_env.copy() - proc_env.update({'PATH': os.environ.get('PATH', ''), - 'LD_LIBRARY_PATH': - os.environ.get('LD_LIBRARY_PATH', ''), - 'PYTHONPATH': - os.environ.get('PYTHONPATH', ''), - 'PYTHONUNBUFFERED': 'x'}) + scorep_env = {key: os.environ[key] for key in os.environ if key.startswith('SCOREP_')} + proc_env = {'PATH': os.environ.get('PATH', ''), + 'LD_LIBRARY_PATH': os.environ.get('LD_LIBRARY_PATH', ''), + 'PYTHONPATH': os.environ.get('PYTHONPATH', ''), + 'PYTHONUNBUFFERED': 'x'} + proc_env.update(scorep_env) # scorep path, subprocess observation # determine datetime for figuring out scorep path after execution @@ -823,8 +844,8 @@ async def scorep_execute( # Determine directory to which trace files were saved by Score-P scorep_folder = "" - if "SCOREP_EXPERIMENT_DIRECTORY" in self.scorep_env: - scorep_folder = self.scorep_env["SCOREP_EXPERIMENT_DIRECTORY"] + if "SCOREP_EXPERIMENT_DIRECTORY" in os.environ: + scorep_folder = os.environ["SCOREP_EXPERIMENT_DIRECTORY"] self.cell_output( f"Instrumentation results can be found in {scorep_folder}" ) @@ -867,16 +888,9 @@ async def scorep_execute( code_for_history, time_indices) return self.standard_reply() - async def do_execute( - self, - code, - silent, - store_history=False, - user_expressions=None, - allow_stdin=False, - *, - cell_id=None, - ): + async def do_execute(self, code, silent, store_history=False, + user_expressions=None, allow_stdin=False, *, + cell_id=None, **kwargs): """ Override of do_execute() method of IPythonKernel. If no custom magic commands specified, execute cell with super().do_execute(), @@ -1073,10 +1087,9 @@ async def do_execute( return self.standard_reply() elif code.startswith("%%set_perfmonitor"): return self.set_perfmonitor(code) - elif code.startswith("%%scorep_env"): - return self.set_scorep_env(code) elif code.startswith("%%scorep_python_binding_arguments"): - return self.set_scorep_pythonargs(code) + return (self.scorep_not_available() or + self.set_scorep_pythonargs(code)) elif code.startswith("%%serializer_settings"): self.cell_output( "Deprecated. Use: %%marshalling_settings\n[MARSHALLER=]\n[" @@ -1085,74 +1098,83 @@ async def do_execute( ) return self.standard_reply() elif code.startswith("%%marshalling_settings"): - return self.marshaller_settings(code) + return (self.scorep_not_available() or + self.marshaller_settings(code)) elif code.startswith("%%enable_multicellmode"): - return self.enable_multicellmode() + return self.scorep_not_available() or self.enable_multicellmode() elif code.startswith("%%abort_multicellmode"): - return self.abort_multicellmode() + return self.scorep_not_available() or self.abort_multicellmode() elif code.startswith("%%finalize_multicellmode"): # Cannot be put into a separate function due to tight coupling # between do_execute and scorep_execute - if self.mode == KernelMode.MULTICELL: - self.mode = KernelMode.DEFAULT - try: - # second multicell_code should be cleaned for code history - reply_status = await self.scorep_execute( - self.multicell_code, - self.multicell_code_history, + if not self.scorep_available_: + self.cell_output("Score-P not available, cell ignored.", + "stderr") + return self.standard_reply() + else: + if self.mode == KernelMode.MULTICELL: + self.mode = KernelMode.DEFAULT + try: + reply_status = await self.scorep_execute( + self.multicell_code, + silent, + store_history, + user_expressions, + allow_stdin, + cell_id=cell_id, + ) + except Exception: + self.cell_output( + "KernelError: Multicell execution failed.", + "stderr" + ) + return self.standard_reply() + self.multicell_code = "" + self.multicell_cellcount = 0 + return reply_status + elif self.mode == KernelMode.WRITEFILE: + self.writefile_multicell = False + return self.standard_reply() + else: + self.cell_output( + f"KernelWarning: Currently in {self.mode}, ignore command", + "stderr", + ) + return self.standard_reply() + elif code.startswith("%%start_writefile"): + return self.scorep_not_available() or self.start_writefile(code) + elif code.startswith("%%abort_writefile"): + return self.scorep_not_available() or self.abort_writefile() + elif code.startswith("%%end_writefile"): + return self.scorep_not_available() or self.end_writefile() + elif code.startswith("%%execute_with_scorep"): + if not self.scorep_available_: + self.cell_output("Score-P not available, cell ignored.", "stderr") + return self.standard_reply() + else: + if self.mode == KernelMode.DEFAULT: + return await self.scorep_execute( + code.split("\n", 1)[1], silent, store_history, user_expressions, allow_stdin, cell_id=cell_id, ) - except Exception: - self.cell_output( - "KernelError: Multicell execution failed.", "stderr" + elif self.mode == KernelMode.MULTICELL: + return self.append_multicellmode( + magics_cleanup(code.split("\n", 1)[1])[1] + ) + elif self.mode == KernelMode.WRITEFILE: + scorep_env, nomagic_code = magics_cleanup(code.split("\n", 1)[1]) + self.writefile_scorep_env.extend(scorep_env) + return self.append_writefile( + nomagic_code, + explicit_scorep=True, ) - return self.standard_reply() - self.multicell_code = "import time\n" - self.multicell_code_history = "" - self.multicell_cellcount = -1 - return reply_status - elif self.mode == KernelMode.WRITEFILE: - self.writefile_multicell = False - return self.standard_reply() - else: - self.cell_output( - f"KernelWarning: Currently in {self.mode}, ignore command", - "stderr", - ) - return self.standard_reply() - elif code.startswith("%%start_writefile"): - return self.start_writefile(code) - elif code.startswith("%%end_writefile"): - return self.end_writefile() - elif code.startswith("%%execute_with_scorep"): - if self.mode == KernelMode.DEFAULT: - # second code argument is for history purposes, we want to keep - # everything - return await self.scorep_execute( - code.split("\n", 1)[1], - code, - silent, - store_history, - user_expressions, - allow_stdin, - cell_id=cell_id, - ) - elif self.mode == KernelMode.MULTICELL: - return self.append_multicellmode( - magics_cleanup(code.split("\n", 1)[1]) - ) - elif self.mode == KernelMode.WRITEFILE: - return self.append_writefile( - magics_cleanup(code.split("\n", 1)[1]), - explicit_scorep=True, - ) else: if self.mode == KernelMode.DEFAULT: - self.pershelper.parse(magics_cleanup(code), "jupyter") + self.pershelper.parse(magics_cleanup(code)[1], "jupyter") self.perfdata_handler.start_perfmonitor(os.getpid()) parent_ret = await super().do_execute( code, @@ -1171,10 +1193,13 @@ async def do_execute( code) return parent_ret elif self.mode == KernelMode.MULTICELL: - return self.append_multicellmode(magics_cleanup(code)) + return self.append_multicellmode(magics_cleanup(code)[1]) elif self.mode == KernelMode.WRITEFILE: + scorep_env, nomagic_code = magics_cleanup(code) + self.writefile_scorep_env.extend(scorep_env) return self.append_writefile( - magics_cleanup(code), explicit_scorep=False + nomagic_code, + explicit_scorep=False, ) def do_shutdown(self, restart): diff --git a/src/jumper/userpersistence.py b/src/jumper/userpersistence.py index a356f12..ee9cd4c 100644 --- a/src/jumper/userpersistence.py +++ b/src/jumper/userpersistence.py @@ -4,8 +4,7 @@ import astunparse from pathlib import Path import uuid - -import dill +import importlib scorep_script_name = "scorep_script.py" @@ -83,12 +82,13 @@ def postprocess(self): os.remove(scorep_script_name) def set_marshaller(self, marshaller): - # TODO: valid marshallers should not be configured in code but via an - # environment variable - valid_marshallers = {"dill", "cloudpickle", "parallel_marshall"} - return marshaller in valid_marshallers and ( - setattr(self, "marshaller", marshaller) or True - ) + try: + marshaller_module = importlib.import_module(marshaller) + except ImportError: + return False + if not hasattr(marshaller_module, 'dump') or not hasattr(marshaller_module, 'load'): + return False + return (setattr(self, "marshaller", marshaller) or True) def set_mode(self, mode): valid_modes = {"disk", "memory"} @@ -99,6 +99,7 @@ def jupyter_dump(self): Generate code for kernel ghost cell to dump notebook persistence for subprocess. """ + jupyter_dump_ = ( "import sys\n" "import os\n" @@ -109,7 +110,7 @@ def jupyter_dump(self): f"'{self.paths['jupyter']['sys_path']}',{self.marshaller})\n" f"dump_variables({str(self.jupyter_variables)},globals()," f"'{self.paths['jupyter']['var']}'," - f"{self.marshaller})" + f"{self.marshaller})\n" ) return jupyter_dump_ @@ -209,14 +210,14 @@ def dump_runtime( filtered_os_environ_ = { k: v for k, v in os_environ_.items() - if not k.startswith("SCOREP_PYTHON_BINDINGS_") + if not k.startswith("SCOREP_") } with os.fdopen(os.open(os_environ_dump_, os.O_WRONLY | os.O_CREAT), 'wb') as file: - dill.dump(filtered_os_environ_, file) + marshaller.dump(filtered_os_environ_, file) with os.fdopen(os.open(sys_path_dump_, os.O_WRONLY | os.O_CREAT), 'wb') as file: - dill.dump(sys_path_, file) + marshaller.dump(sys_path_, file) def dump_variables(variables_names, globals_, var_dump_, marshaller): @@ -235,7 +236,6 @@ def dump_variables(variables_names, globals_, var_dump_, marshaller): with os.fdopen(os.open(var_dump_, os.O_WRONLY | os.O_CREAT), 'wb') as file: marshaller.dump(user_variables, file) - def load_runtime( os_environ_, sys_path_, os_environ_dump_, sys_path_dump_, marshaller ): @@ -243,10 +243,10 @@ def load_runtime( loaded_sys_path_ = [] with os.fdopen(os.open(os_environ_dump_, os.O_RDONLY), 'rb') as file: - loaded_os_environ_ = dill.load(file) + loaded_os_environ_ = marshaller.load(file) with os.fdopen(os.open(sys_path_dump_, os.O_RDONLY), 'rb') as file: - loaded_sys_path_ = dill.load(file) + loaded_sys_path_ = marshaller.load(file) # os_environ_.clear() os_environ_.update(loaded_os_environ_) @@ -327,6 +327,25 @@ def magics_cleanup(code): Remove IPython magics from the code. Return only "persistent" code, which is executed with whitelisted magics. """ + lines = code.splitlines(True) + scorep_env = [] + for i, line in enumerate(lines): + if line.startswith("%env"): + env_var = line.strip().split(' ', 1)[1] + if '=' in env_var: + # Assign environment variable value + if env_var.startswith('SCOREP'): + # For writefile mode, extract SCOREP env vars separately + scorep_env.append('export ' + env_var + '\n') + else: + key, val = env_var.split('=', 1) + lines[i] = f'os.environ["{key}"]="{val}"\n' + else: + # Print environment variable value + key = env_var + lines[i] = f'print("env: {key}=os.environ[\'{key}\']")\n' + code = ''.join(lines) + whitelist_prefixes_cell = ["%%prun", "%%capture"] whitelist_prefixes_line = ["%prun", "%time"] @@ -343,4 +362,4 @@ def magics_cleanup(code): tuple(whitelist_prefixes_line) ): # Line magic & executed cell, remove first word nomagic_code = code.split(" ", 1)[1] - return nomagic_code + return scorep_env, nomagic_code \ No newline at end of file diff --git a/tests/kernel/notebook.ipynb b/tests/kernel/notebook.ipynb index d0cee68..a335321 100644 --- a/tests/kernel/notebook.ipynb +++ b/tests/kernel/notebook.ipynb @@ -16,16 +16,18 @@ "name": "stdout", "output_type": "stream", "text": [ - "Score-P environment set successfully: {'SCOREP_ENABLE_TRACING': '1', 'SCOREP_ENABLE_PROFILING': '0', 'SCOREP_TOTAL_MEMORY': '3g', 'SCOREP_EXPERIMENT_DIRECTORY': 'test_kernel_tmp/scorep-traces'}" + "env: SCOREP_ENABLE_TRACING=1\n", + "env: SCOREP_ENABLE_PROFILING=0\n", + "env: SCOREP_TOTAL_MEMORY=3g\n", + "env: SCOREP_EXPERIMENT_DIRECTORY=test_kernel_tmp/scorep-traces\n" ] } ], "source": [ - "%%scorep_env\n", - "SCOREP_ENABLE_TRACING=1\n", - "SCOREP_ENABLE_PROFILING=0\n", - "SCOREP_TOTAL_MEMORY=3g\n", - "SCOREP_EXPERIMENT_DIRECTORY=test_kernel_tmp/scorep-traces" + "%env SCOREP_ENABLE_TRACING=1\n", + "%env SCOREP_ENABLE_PROFILING=0\n", + "%env SCOREP_TOTAL_MEMORY=3g\n", + "%env SCOREP_EXPERIMENT_DIRECTORY=test_kernel_tmp/scorep-traces" ] }, { @@ -122,14 +124,14 @@ "name": "stdout", "output_type": "stream", "text": [ - "Serializer set to 'dill', mode set to 'memory'." + "Kernel uses 'dill' marshaller in 'memory' mode." ] } ], "source": [ - "%%serializer_settings\n", - "dill\n", - "memory" + "%%marshalling_settings\n", + "MARSHALLER=dill\n", + "MODE=memory" ] }, { @@ -162,14 +164,14 @@ "name": "stdout", "output_type": "stream", "text": [ - "Serializer set to 'dill', mode set to 'disk'." + "Kernel uses 'dill' marshaller in 'disk' mode." ] } ], "source": [ - "%%serializer_settings\n", - "dill\n", - "disk" + "%%marshalling_settings\n", + "MARSHALLER=dill\n", + "MODE=disk" ] }, { @@ -202,14 +204,14 @@ "name": "stdout", "output_type": "stream", "text": [ - "Serializer set to 'cloudpickle', mode set to 'memory'." + "Kernel uses 'cloudpickle' marshaller in 'memory' mode." ] } ], "source": [ - "%%serializer_settings\n", - "cloudpickle\n", - "memory" + "%%marshalling_settings\n", + "MARSHALLER=cloudpickle\n", + "MODE=memory" ] }, { @@ -242,14 +244,14 @@ "name": "stdout", "output_type": "stream", "text": [ - "Serializer set to 'cloudpickle', mode set to 'disk'." + "Kernel uses 'cloudpickle' marshaller in 'disk' mode." ] } ], "source": [ - "%%serializer_settings\n", - "cloudpickle\n", - "disk" + "%%marshalling_settings\n", + "MARSHALLER=cloudpickle\n", + "MODE=disk" ] }, { @@ -420,9 +422,17 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 20, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "'/new/subprocess/path' found in sys.path\n" + ] + } + ], "source": [ "if '/new/subprocess/path' in sys.path:\n", " print(\"'/new/subprocess/path' found in sys.path\")" @@ -437,45 +447,85 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 21, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Multicell mode enabled. The following cells will be marked for instrumented execution." + ] + } + ], "source": [ "%%enable_multicellmode" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 22, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Cell marked for multicell mode. It will be executed at position 1" + ] + } + ], "source": [ "c = np.sum(c_mtx)" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 23, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Multicell mode aborted." + ] + } + ], "source": [ "%%abort_multicellmode" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 24, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Multicell mode enabled. The following cells will be marked for instrumented execution." + ] + } + ], "source": [ "%%enable_multicellmode" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 25, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Cell marked for multicell mode. It will be executed at position 1" + ] + } + ], "source": [ "with scorep.instrumenter.enable():\n", " c = np.sum(c_mtx)\n", @@ -484,9 +534,17 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 26, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Cell marked for multicell mode. It will be executed at position 2" + ] + } + ], "source": [ "print('c =', c)\n", "print('Sum(c_vec) =', c_vec.sum())" @@ -494,9 +552,32 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 27, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u0000Executing cell 1\n", + "with scorep.instrumenter.enable():\n", + " c = np.sum(c_mtx)\n", + "c_vec = np.arange(b, c)\n", + "----------------------------------\n", + "\n", + "\n", + "Executing cell 2\n", + "print('c =', c)\n", + "print('Sum(c_vec) =', c_vec.sum())\n", + "----------------------------------\n", + "c = 350\n", + "Sum(c_vec) = 61030\n", + "\n", + "\n", + "Instrumentation results can be found in test_kernel_tmp/scorep-traces" + ] + } + ], "source": [ "%%finalize_multicellmode" ] @@ -510,31 +591,56 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 28, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Started converting to Python script. See files:\n", + "/home/carthage/py_work/hotfix/test_kernel_tmp/my_jupyter_to_script_run.sh\n", + "/home/carthage/py_work/hotfix/test_kernel_tmp/my_jupyter_to_script.py\n" + ] + } + ], "source": [ "%%start_writefile test_kernel_tmp/my_jupyter_to_script" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 29, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Python commands without instrumentation recorded." + ] + } + ], "source": [ - "%%scorep_env\n", - "SCOREP_ENABLE_TRACING=1\n", - "SCOREP_ENABLE_PROFILING=0\n", - "SCOREP_TOTAL_MEMORY=3g\n", - "SCOREP_EXPERIMENT_DIRECTORY=test_kernel_tmp/scorep-traces" + "%env SCOREP_ENABLE_TRACING=1\n", + "%env SCOREP_ENABLE_PROFILING=0\n", + "%env SCOREP_TOTAL_MEMORY=3g\n", + "%env SCOREP_EXPERIMENT_DIRECTORY=test_kernel_tmp/scorep-traces" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 30, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Score-P bindings arguments recorded." + ] + } + ], "source": [ "%%scorep_python_binding_arguments\n", "--noinstrumenter" @@ -542,9 +648,17 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 30, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Python commands without instrumentation recorded." + ] + } + ], "source": [ "import numpy as np\n", "a = 5\n", @@ -556,9 +670,17 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 31, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Python commands with instrumentation recorded." + ] + } + ], "source": [ "%%execute_with_scorep\n", "import scorep\n", @@ -569,7 +691,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 32, "metadata": {}, "outputs": [], "source": [ @@ -578,18 +700,34 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 33, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Python commands with instrumentation recorded." + ] + } + ], "source": [ "c = np.sum(c_mtx)" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 34, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Python commands with instrumentation recorded." + ] + } + ], "source": [ "with scorep.instrumenter.enable():\n", " c = np.sum(c_mtx)\n", @@ -598,9 +736,17 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 35, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Python commands with instrumentation recorded." + ] + } + ], "source": [ "print('c =', c)\n", "print('Sum(c_vec) =', c_vec.sum())" @@ -608,7 +754,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 36, "metadata": {}, "outputs": [], "source": [ @@ -617,9 +763,17 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 37, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Finished converting to Python script." + ] + } + ], "source": [ "%%end_writefile" ] @@ -628,19 +782,37 @@ "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "a + b = 15\n", + "a - b = -5\n", + "c = 350\n", + "Sum(c_vec) = 61030\n" + ] + } + ], "source": [ "%%bash\n", - "chmod u+x test_kernel_tmp/my_jupyter_to_script_run.sh\n", + "chmod u+x ./test_kernel_tmp/my_jupyter_to_script_run.sh\n", "./test_kernel_tmp/my_jupyter_to_script_run.sh" ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { "kernelspec": { - "display_name": "scorep-python", + "display_name": "JUmPER", "language": "python", - "name": "scorep-python" + "name": "jumper" }, "language_info": { "file_extension": ".py", diff --git a/tests/kernel/persistence.yaml b/tests/kernel/persistence.yaml index f720699..3d2e459 100644 --- a/tests/kernel/persistence.yaml +++ b/tests/kernel/persistence.yaml @@ -1,8 +1,7 @@ - - |- - import os - os.environ['JUPYTER_VAR'] = 'JUPYTER' - - - "" + %env JUPYTER_VAR=JUPYTER + - - "env: JUPYTER_VAR=JUPYTER\n" - - |- import numpy as np @@ -64,8 +63,8 @@ 4 36 Name: a*b, dtype: int64 - - - "print('SUBPROCESS_VAR =', os.environ['SUBPROCESS_VAR'])" - - - "SUBPROCESS_VAR = SUBPROCESS\n" + - "%env SUBPROCESS_VAR" + - - "'SUBPROCESS'" - - |- if '/new/subprocess/path' in sys.path: diff --git a/tests/kernel/scorep_env.yaml b/tests/kernel/scorep_env.yaml index 859dc1d..f7af4d3 100644 --- a/tests/kernel/scorep_env.yaml +++ b/tests/kernel/scorep_env.yaml @@ -1,9 +1,11 @@ - - |- - %%scorep_env - SCOREP_ENABLE_TRACING=1 - SCOREP_ENABLE_PROFILING=0 - SCOREP_TOTAL_MEMORY=3g - SCOREP_EXPERIMENT_DIRECTORY=test_kernel_tmp/scorep-traces - - - "Score-P environment set successfully: {'SCOREP_ENABLE_TRACING': '1', 'SCOREP_ENABLE_PROFILING': '0', - 'SCOREP_TOTAL_MEMORY': '3g', 'SCOREP_EXPERIMENT_DIRECTORY': 'test_kernel_tmp/scorep-traces'}" \ No newline at end of file + %env SCOREP_ENABLE_TRACING=1 + %env SCOREP_ENABLE_PROFILING=0 + %env SCOREP_TOTAL_MEMORY=3g + %env SCOREP_EXPERIMENT_DIRECTORY=test_kernel_tmp/scorep-traces + - - | + env: SCOREP_ENABLE_TRACING=1 + env: SCOREP_ENABLE_PROFILING=0 + env: SCOREP_TOTAL_MEMORY=3g + env: SCOREP_EXPERIMENT_DIRECTORY=test_kernel_tmp/scorep-traces diff --git a/tests/kernel/writemode.yaml b/tests/kernel/writemode.yaml index 471283c..d1a805f 100644 --- a/tests/kernel/writemode.yaml +++ b/tests/kernel/writemode.yaml @@ -1,17 +1,32 @@ - - - "%%start_writefile test_kernel_tmp/my_jupyter_to_script" + - "%%start_writefile" - - | Started converting to Python script. See files: /home/runner/work/scorep_jupyter_kernel_python/scorep_jupyter_kernel_python/test_kernel_tmp/my_jupyter_to_script_run.sh /home/runner/work/scorep_jupyter_kernel_python/scorep_jupyter_kernel_python/test_kernel_tmp/my_jupyter_to_script.py - - |- - %%scorep_env - SCOREP_ENABLE_TRACING=1 - SCOREP_ENABLE_PROFILING=0 - SCOREP_TOTAL_MEMORY=3g - SCOREP_EXPERIMENT_DIRECTORY=test_kernel_tmp/scorep-traces - - - "Environment variables recorded." + %env SCOREP_ENABLE_TRACING=1 + %env SCOREP_ENABLE_PROFILING=0 + %env SCOREP_TOTAL_MEMORY=3g + %env SCOREP_EXPERIMENT_DIRECTORY=test_kernel_tmp/scorep-traces + - - "Python commands without instrumentation recorded." +- + - "%%abort_writefile" + - - "Writefile mode aborted." +- + - "%%start_writefile test_kernel_tmp/my_jupyter_to_script" + - - | + Started converting to Python script. See files: + /home/carthage/Documents/scorep_jupyter_kernel_python/test_kernel_tmp/my_jupyter_to_script_run.sh + /home/carthage/Documents/scorep_jupyter_kernel_python/test_kernel_tmp/my_jupyter_to_script.py +- + - |- + %env SCOREP_ENABLE_TRACING=1 + %env SCOREP_ENABLE_PROFILING=0 + %env SCOREP_TOTAL_MEMORY=3g + %env SCOREP_EXPERIMENT_DIRECTORY=test_kernel_tmp/scorep-traces + - - "Python commands without instrumentation recorded." - - |- %%scorep_python_binding_arguments diff --git a/tests/test_kernel.py b/tests/test_kernel.py index 430002b..f2b2fd8 100644 --- a/tests/test_kernel.py +++ b/tests/test_kernel.py @@ -28,9 +28,14 @@ def check_stream_output(self, code, expected_output, stream="stdout"): self.flush_channels() reply, output_msgs = self.execute_helper(code=code) for msg, expected_msg in zip(output_msgs, expected_output): - self.assertEqual(msg["header"]["msg_type"], "stream") - self.assertEqual(msg["content"]["name"], stream) - self.assertEqual(msg["content"]["text"], expected_msg) + #self.assertEqual(msg["header"]["msg_type"], "stream") + # some messages can be of type 'execute_result' type instead of stdout + # self.assertEqual(msg["content"]["name"], stream) + if msg["header"]["msg_type"] == "stream": + self.assertEqual(msg["content"]["name"], stream) + self.assertEqual(msg["content"]["text"], expected_msg) + elif msg["header"]["msg_type"] == "execute_result": + self.assertEqual(msg["content"]["data"]["text/plain"], expected_msg) def check_from_file(self, filename): with open(filename, "r") as file: From 19527cab7fea437cb0ef5b4c817eef1516e6e212 Mon Sep 17 00:00:00 2001 From: Elias Werner Date: Wed, 20 Nov 2024 16:57:44 +0100 Subject: [PATCH 06/13] add ipympl to dependencies --- pyproject.toml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index bee4a19..6511b6d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -21,13 +21,14 @@ classifiers=[ dependencies = [ "ipykernel", + "ipywidgets", + "ipympl", "jupyter-client", "astunparse", "dill", "itables", "matplotlib", "pandas", - "ipywidgets", "pynvml" # we need that only for Nvidia GPU systems ] From 928e76e0e5295d3de566ac3d90f0f8c3ad500554 Mon Sep 17 00:00:00 2001 From: Elias Werner Date: Wed, 20 Nov 2024 17:35:03 +0100 Subject: [PATCH 07/13] check also for scorep python bindings in scorep availability --- src/jumper/kernel.py | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/src/jumper/kernel.py b/src/jumper/kernel.py index abfd1e0..d7ef4b8 100644 --- a/src/jumper/kernel.py +++ b/src/jumper/kernel.py @@ -97,6 +97,11 @@ def __init__(self, **kwargs): self.nodelist = self.perfdata_handler.get_nodelist() self.scorep_available_ = shutil.which("scorep") + self.scorep_python_available_ = True + try: + import scorep + except ModuleNotFoundError: + self.scorep_python_available_ = False def cell_output(self, string, stream="stdout"): """ @@ -116,7 +121,13 @@ def standard_reply(self): def scorep_not_available(self): if not self.scorep_available_: - self.cell_output("Score-P not available, cell ignored.", "stderr") + self.cell_output("Score-P not available, cell ignored.", + "stderr") + return self.standard_reply() + if not self.scorep_python_available_: + self.cell_output("Score-P Python not available, cell ignored. " + "Consider installing it via `pip install scorep`", + "stderr") return self.standard_reply() else: return None @@ -1188,12 +1199,8 @@ async def do_execute(self, code, silent, store_history=False, elif code.startswith("%%end_writefile"): return self.scorep_not_available() or self.end_writefile() elif code.startswith("%%execute_with_scorep"): - if not self.scorep_available_: - self.cell_output( - "Score-P not available, cell ignored.", "stderr" - ) - return self.standard_reply() - else: + scorep_missing = self.scorep_not_available() + if scorep_missing is None: if self.mode == KernelMode.DEFAULT: return await self.scorep_execute( code.split("\n", 1)[1], @@ -1216,6 +1223,8 @@ async def do_execute(self, code, silent, store_history=False, nomagic_code, explicit_scorep=True, ) + else: + return scorep_missing else: if self.mode == KernelMode.DEFAULT: self.pershelper.parse(magics_cleanup(code)[1], "jupyter") From 87607e51a97423347080f8b5e4d89600f8106161 Mon Sep 17 00:00:00 2001 From: Elias Werner Date: Wed, 20 Nov 2024 18:04:05 +0100 Subject: [PATCH 08/13] fix linter, improve scorep python check --- src/jumper/kernel.py | 115 +++++++++++++++++++++--------------- src/jumper/visualization.py | 41 +++++++++---- 2 files changed, 97 insertions(+), 59 deletions(-) diff --git a/src/jumper/kernel.py b/src/jumper/kernel.py index d7ef4b8..898b804 100644 --- a/src/jumper/kernel.py +++ b/src/jumper/kernel.py @@ -15,7 +15,7 @@ from itables import show from jumper.userpersistence import PersHelper, scorep_script_name from jumper.userpersistence import magics_cleanup - +import importlib from jumper.perfdatahandler import PerformanceDataHandler import jumper.visualization as perfvis @@ -99,7 +99,7 @@ def __init__(self, **kwargs): self.scorep_available_ = shutil.which("scorep") self.scorep_python_available_ = True try: - import scorep + importlib.import_module("scorep") except ModuleNotFoundError: self.scorep_python_available_ = False @@ -121,13 +121,14 @@ def standard_reply(self): def scorep_not_available(self): if not self.scorep_available_: - self.cell_output("Score-P not available, cell ignored.", - "stderr") + self.cell_output("Score-P not available, cell ignored.", "stderr") return self.standard_reply() if not self.scorep_python_available_: - self.cell_output("Score-P Python not available, cell ignored. " - "Consider installing it via `pip install scorep`", - "stderr") + self.cell_output( + "Score-P Python not available, cell ignored. " + "Consider installing it via `pip install scorep`", + "stderr", + ) return self.standard_reply() else: return None @@ -268,7 +269,7 @@ def append_multicellmode(self, code): f"print('Executing cell {self.multicell_cellcount}')\n" + f"print('''{code}''')\n" + f"print('-' * {max_line_len})\n" - + f"print('MCM_TS'+str(time.time()))\n" + + "print('MCM_TS'+str(time.time()))\n" + f"{code}\n" + "print('''\n''')\n" ) @@ -814,17 +815,21 @@ async def scorep_execute( # retrieve the index this cell will have in the global history sub_idx = len(self.perfdata_handler.get_code_history()) # append to have end of last code fragment - multicellmode_timestamps.append("MCM_TS"+str(time.time())) + multicellmode_timestamps.append("MCM_TS" + str(time.time())) time_indices = [[]] nb_ms = 0.0 for idx, ts_string in enumerate(multicellmode_timestamps[:-1]): - secs = (float(multicellmode_timestamps[idx+1][6:]) - - float(ts_string[6:])) - nb_ms += (secs / - int(os.environ.get("JUMPER_REPORT_FREQUENCY", 2))) + secs = float(multicellmode_timestamps[idx + 1][6:]) - float( + ts_string[6:] + ) + nb_ms += secs / int( + os.environ.get("JUMPER_REPORT_FREQUENCY", 2) + ) if nb_ms >= 1.0: # only consider if we have measurements - time_indices[0].append((str(sub_idx)+"_"+str(idx), nb_ms)) + time_indices[0].append( + (str(sub_idx) + "_" + str(idx), nb_ms) + ) nb_ms %= 1.0 # add time for last to last measurement if nb_ms >= 0.0: @@ -932,13 +937,22 @@ async def scorep_execute( self.pershelper.postprocess() if performance_data_nodes: self.report_perfdata(performance_data_nodes, duration) - self.perfdata_handler.append_code(datetime.datetime.now(), - code_for_history, time_indices) + self.perfdata_handler.append_code( + datetime.datetime.now(), code_for_history, time_indices + ) return self.standard_reply() - async def do_execute(self, code, silent, store_history=False, - user_expressions=None, allow_stdin=False, *, - cell_id=None, **kwargs): + async def do_execute( + self, + code, + silent, + store_history=False, + user_expressions=None, + allow_stdin=False, + *, + cell_id=None, + **kwargs, + ): """ Override of do_execute() method of IPythonKernel. If no custom magic commands specified, execute cell with super().do_execute(), @@ -972,20 +986,20 @@ async def do_execute(self, code, silent, store_history=False, """ if code.startswith("%%display_graph_for_last"): if not len(self.perfdata_handler.get_perfdata_history()): - self.cell_output( - "No performance data available." - ) + self.cell_output("No performance data available.") time_indices = self.perfdata_handler.get_time_indices()[-1] if time_indices: sub_idxs = [x[0] for x in time_indices[0]] - self.cell_output(f"Cell seemed to be tracked in multi cell" - " mode. Got performance data for the" - f" following sub cells: {sub_idxs}") + self.cell_output( + f"Cell seemed to be tracked in multi cell" + " mode. Got performance data for the" + f" following sub cells: {sub_idxs}" + ) perfvis.draw_performance_graph( self.nodelist, self.perfdata_handler.get_perfdata_history()[-1], self.gpu_avail, - time_indices + time_indices, ) return self.standard_reply() elif code.startswith("%%display_graph_for_index"): @@ -1005,14 +1019,16 @@ async def do_execute(self, code, silent, store_history=False, time_indices = self.perfdata_handler.get_time_indices()[index] if time_indices: sub_idxs = [x[0] for x in time_indices[0]] - self.cell_output(f"Cell seemed to be tracked in multi cell" - " mode. Got performance data for the" - f" following sub cells: {sub_idxs}") + self.cell_output( + f"Cell seemed to be tracked in multi cell" + " mode. Got performance data for the" + f" following sub cells: {sub_idxs}" + ) perfvis.draw_performance_graph( self.nodelist, self.perfdata_handler.get_perfdata_history()[index], self.gpu_avail, - time_indices + time_indices, ) return self.standard_reply() elif code.startswith("%%display_graph_for_all"): @@ -1057,8 +1073,9 @@ async def do_execute(self, code, silent, store_history=False, pd.DataFrame( self.perfdata_handler.get_code_history(), columns=["timestamp", "code"], - ).reset_index(), layout={"topStart": "search", "topEnd": None}, - columnDefs=[{"className": 'dt-left', "targets": 2}], + ).reset_index(), + layout={"topStart": "search", "topEnd": None}, + columnDefs=[{"className": "dt-left", "targets": 2}], ) return self.standard_reply() elif code.startswith("%%perfdata_to_variable"): @@ -1078,15 +1095,18 @@ async def do_execute(self, code, silent, store_history=False, # measurements, e.g. (2_0, 5), (2_1, 3), (2_2, 7) mcm_time_indices = self.perfdata_handler.get_time_indices() mcm_time_indices = list( - filter(lambda item: item is not None, mcm_time_indices)) + filter(lambda item: item is not None, mcm_time_indices) + ) - code = (f"{varname}=" - f"{self.perfdata_handler.get_perfdata_history()}") + code = ( + f"{varname}=" + f"{self.perfdata_handler.get_perfdata_history()}" + ) if mcm_time_indices: code += f"\n{varname}.append({mcm_time_indices})" - await super().do_execute(code,silent=True) + await super().do_execute(code, silent=True) self.cell_output( "Exported performance data to " + str(varname) @@ -1097,10 +1117,10 @@ async def do_execute(self, code, silent, store_history=False, self.cell_output( "Detected that cells were executed in multi cell mode." + f"Last entry in {varname} is a list that contains " - f"the sub indices per cell that were executed in " - f"in multi cell mode and a counter for the number of" - f" performance measurements within this sub cell, " - f"e.g. f{mcm_time_indices[-1]}", + f"the sub indices per cell that were executed in " + f"in multi cell mode and a counter for the number of" + f" performance measurements within this sub cell, " + f"e.g. f{mcm_time_indices[-1]}", "stdout", ) return self.standard_reply() @@ -1137,8 +1157,9 @@ async def do_execute(self, code, silent, store_history=False, elif code.startswith("%%set_perfmonitor"): return self.set_perfmonitor(code) elif code.startswith("%%scorep_python_binding_arguments"): - return (self.scorep_not_available() or - self.set_scorep_pythonargs(code)) + return self.scorep_not_available() or self.set_scorep_pythonargs( + code + ) elif code.startswith("%%serializer_settings"): self.cell_output( "Deprecated. Use: %%marshalling_settings" @@ -1147,8 +1168,9 @@ async def do_execute(self, code, silent, store_history=False, ) return self.standard_reply() elif code.startswith("%%marshalling_settings"): - return (self.scorep_not_available() or - self.marshaller_settings(code)) + return self.scorep_not_available() or self.marshaller_settings( + code + ) elif code.startswith("%%enable_multicellmode"): return self.scorep_not_available() or self.enable_multicellmode() elif code.startswith("%%abort_multicellmode"): @@ -1242,8 +1264,9 @@ async def do_execute(self, code, silent, store_history=False, ) if performance_data_nodes: self.report_perfdata(performance_data_nodes, duration) - self.perfdata_handler.append_code(datetime.datetime.now(), - code) + self.perfdata_handler.append_code( + datetime.datetime.now(), code + ) return parent_ret elif self.mode == KernelMode.MULTICELL: return self.append_multicellmode(magics_cleanup(code)[1]) diff --git a/src/jumper/visualization.py b/src/jumper/visualization.py index dcff8b0..c0ef635 100644 --- a/src/jumper/visualization.py +++ b/src/jumper/visualization.py @@ -150,8 +150,8 @@ def plot_graph(ax, metric, perfdata, time_indices=None, color=None): # colorization of the plot in case of multiple cells if time_indices: - # in multi node case, we have to iterate over the indices (time_indices) - # and not only 0 here + # in multi node case, we have to iterate over the indices ( + # time_indices) and not only 0 here current_index = 0 target_index = -1 transition_offset = (x_scale[1] - x_scale[0]) / 2 @@ -165,21 +165,36 @@ def plot_graph(ax, metric, perfdata, time_indices=None, color=None): # don't use offset for last cell if sub_idx == last_idx: transition_offset = 0 - ax.axvspan(x_scale[current_index] + start_offset, - x_scale[target_index] + - transition_offset, - facecolor=color[cell_idx], alpha=0.3) + ax.axvspan( + x_scale[current_index] + start_offset, + x_scale[target_index] + transition_offset, + facecolor=color[cell_idx], + alpha=0.3, + ) - text_x_pos = x_scale[current_index] + start_offset + ( - (x_scale[target_index] + transition_offset - - x_scale[current_index] + start_offset) / 2) + text_x_pos = ( + x_scale[current_index] + + start_offset + + ( + ( + x_scale[target_index] + + transition_offset + - x_scale[current_index] + + start_offset + ) + / 2 + ) + ) text_y_pos = ax.get_ylim()[0] + (ax.get_ylim()[1] * 0.05) # add cell index to plot - ax.text(text_x_pos, text_y_pos, "#" + str(sub_idx), style='italic', - bbox={ - 'facecolor': 'lightgrey', 'alpha': 0.5, 'pad': 2} - ) + ax.text( + text_x_pos, + text_y_pos, + "#" + str(sub_idx), + style="italic", + bbox={"facecolor": "lightgrey", "alpha": 0.5, "pad": 2}, + ) current_index = target_index start_offset = transition_offset From 78ad091dc389530c8cbfc79308b6f297db73605f Mon Sep 17 00:00:00 2001 From: Elias Werner Date: Wed, 20 Nov 2024 19:15:12 +0100 Subject: [PATCH 09/13] not sure if tests fixed --- tests/kernel/multicell.yaml | 10 +++++----- tests/test_kernel.py | 2 ++ 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/tests/kernel/multicell.yaml b/tests/kernel/multicell.yaml index a0e4720..bda0392 100644 --- a/tests/kernel/multicell.yaml +++ b/tests/kernel/multicell.yaml @@ -3,7 +3,7 @@ - - "Multicell mode enabled. The following cells will be marked for instrumented execution." - - "c = np.sum(c_mtx)" - - - "Cell marked for multicell mode. It will be executed at position 1" + - - "Cell marked for multicell mode. It will be executed at position 0" - - "%%abort_multicellmode" - - "Multicell mode aborted." @@ -15,23 +15,23 @@ with scorep.instrumenter.enable(): c = np.sum(c_mtx) c_vec = np.arange(b, c) - - - "Cell marked for multicell mode. It will be executed at position 1" + - - "Cell marked for multicell mode. It will be executed at position 0" - - |- print('c =', c) print('Sum(c_vec) =', c_vec.sum()) - - - "Cell marked for multicell mode. It will be executed at position 2" + - - "Cell marked for multicell mode. It will be executed at position 1" - - "%%finalize_multicellmode" - - "\0" - - "Executing cell 1\n" + - "Executing cell 0\n" - "with scorep.instrumenter.enable():\n" - " c = np.sum(c_mtx)\n" - "c_vec = np.arange(b, c)\n" - "----------------------------------\n" - "\n" - "\n" - - "Executing cell 2\n" + - "Executing cell 1\n" - "print('c =', c)\n" - "print('Sum(c_vec) =', c_vec.sum())\n" - "----------------------------------\n" diff --git a/tests/test_kernel.py b/tests/test_kernel.py index 7bd0584..abec581 100644 --- a/tests/test_kernel.py +++ b/tests/test_kernel.py @@ -34,6 +34,7 @@ def check_stream_output(self, code, expected_output, stream="stdout"): # some messages can be of type 'execute_result' # type instead of stdout # self.assertEqual(msg["content"]["name"], stream) + if msg["header"]["msg_type"] == "stream": self.assertEqual(msg["content"]["name"], stream) self.assertEqual(msg["content"]["text"], expected_msg) @@ -42,6 +43,7 @@ def check_stream_output(self, code, expected_output, stream="stdout"): msg["content"]["data"]["text/plain"], expected_msg ) + def check_from_file(self, filename): with open(filename, "r") as file: From 174732b58b3607c1b090b06a96c77fe3de5af7e0 Mon Sep 17 00:00:00 2001 From: Elias Werner Date: Thu, 21 Nov 2024 05:49:03 +0100 Subject: [PATCH 10/13] fix multicellmode for very short cells --- src/jumper/kernel.py | 76 +++++++++++++++++++++----------------------- 1 file changed, 37 insertions(+), 39 deletions(-) diff --git a/src/jumper/kernel.py b/src/jumper/kernel.py index 898b804..fe3bfb6 100644 --- a/src/jumper/kernel.py +++ b/src/jumper/kernel.py @@ -674,7 +674,6 @@ def report_perfdata(self, performance_data_nodes, duration): async def scorep_execute( self, code, - code_for_history, silent, user_expressions=None, allow_stdin=False, @@ -702,7 +701,6 @@ async def scorep_execute( os.open(scorep_script_name, os.O_WRONLY | os.O_CREAT), "w" ) as file: file.write(self.pershelper.subprocess_wrapper(code)) - # For disk mode use implicit synchronization between kernel and # subprocess: await jupyter_dump, subprocess.wait(), # await jupyter_update Ghost cell - dump current Jupyter session for @@ -752,6 +750,7 @@ async def scorep_execute( proc = subprocess.Popen( cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, env=proc_env ) + self.perfdata_handler.start_perfmonitor(proc.pid) # For memory mode jupyter_dump and jupyter_update must be awaited # concurrently to the running subprocess @@ -810,6 +809,7 @@ async def scorep_execute( # explicit timestamps, but aligns the colorization of the plot based # on the number of perf measurements we have, which is individual per # node + time_indices = None if len(multicellmode_timestamps): # retrieve the index this cell will have in the global history @@ -832,9 +832,15 @@ async def scorep_execute( ) nb_ms %= 1.0 # add time for last to last measurement + if nb_ms >= 0.0: - sub_idx, val = time_indices[0][-1] - time_indices[0][-1] = (sub_idx, val + nb_ms) + if len(time_indices[0]): + sub_idx, val = time_indices[0][-1] + time_indices[0][-1] = (sub_idx, val + nb_ms) + else: + time_indices[0].append( + (str(sub_idx) + "_" + str(0), nb_ms) + ) nb_ms = 0.0 for idx, val in enumerate(time_indices[0]): @@ -903,7 +909,7 @@ async def scorep_execute( f"Instrumentation results can be found in {scorep_folder}" ) else: - # Find last creasted directory with scorep* name + # Find last created directory with scorep* name # TODO: Directory isn't created local when running scorep-collector max_iterations = 5 while max_iterations > 0: @@ -938,7 +944,7 @@ async def scorep_execute( if performance_data_nodes: self.report_perfdata(performance_data_nodes, duration) self.perfdata_handler.append_code( - datetime.datetime.now(), code_for_history, time_indices + datetime.datetime.now(), code, time_indices ) return self.standard_reply() @@ -1178,42 +1184,35 @@ async def do_execute( elif code.startswith("%%finalize_multicellmode"): # Cannot be put into a separate function due to tight coupling # between do_execute and scorep_execute - if not self.scorep_available_: - self.cell_output( - "Score-P not available, cell ignored.", "stderr" - ) - return self.standard_reply() - else: - if self.mode == KernelMode.MULTICELL: - self.mode = KernelMode.DEFAULT - try: - reply_status = await self.scorep_execute( - self.multicell_code, - silent, - store_history, - user_expressions, - allow_stdin, - cell_id=cell_id, - ) - except Exception: - self.cell_output( - "KernelError: Multicell execution failed.", - "stderr", - ) - return self.standard_reply() - self.multicell_code = "" - self.multicell_cellcount = 0 - return reply_status - elif self.mode == KernelMode.WRITEFILE: - self.writefile_multicell = False - return self.standard_reply() - else: + if self.mode == KernelMode.MULTICELL: + self.mode = KernelMode.DEFAULT + try: + reply_status = await self.scorep_execute( + self.multicell_code, + silent, + user_expressions, + allow_stdin, + cell_id=cell_id, + ) + except Exception as e: self.cell_output( - f"KernelWarning: Currently in {self.mode}," - f" ignore command", + "KernelError: Multicell execution failed.", "stderr", ) return self.standard_reply() + self.multicell_code = "" + self.multicell_cellcount = -1 + return reply_status + elif self.mode == KernelMode.WRITEFILE: + self.writefile_multicell = False + return self.standard_reply() + else: + self.cell_output( + f"KernelWarning: Currently in {self.mode}," + f" ignore command", + "stderr", + ) + return self.standard_reply() elif code.startswith("%%start_writefile"): return self.scorep_not_available() or self.start_writefile(code) elif code.startswith("%%abort_writefile"): @@ -1227,7 +1226,6 @@ async def do_execute( return await self.scorep_execute( code.split("\n", 1)[1], silent, - store_history, user_expressions, allow_stdin, cell_id=cell_id, From cbb27698c41f108d26eab10c7c9a607bfaf7da6d Mon Sep 17 00:00:00 2001 From: Elias Werner Date: Thu, 21 Nov 2024 06:04:47 +0100 Subject: [PATCH 11/13] make flake8 happy --- src/jumper/kernel.py | 2 +- src/jumper/userpersistence.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/src/jumper/kernel.py b/src/jumper/kernel.py index fe3bfb6..fc16b8e 100644 --- a/src/jumper/kernel.py +++ b/src/jumper/kernel.py @@ -1194,7 +1194,7 @@ async def do_execute( allow_stdin, cell_id=cell_id, ) - except Exception as e: + except Exception: self.cell_output( "KernelError: Multicell execution failed.", "stderr", diff --git a/src/jumper/userpersistence.py b/src/jumper/userpersistence.py index 07022c1..c8f1695 100644 --- a/src/jumper/userpersistence.py +++ b/src/jumper/userpersistence.py @@ -240,6 +240,7 @@ def dump_variables(variables_names, globals_, var_dump_, marshaller): with os.fdopen(os.open(var_dump_, os.O_WRONLY | os.O_CREAT), "wb") as file: marshaller.dump(user_variables, file) + def load_runtime( os_environ_, sys_path_, os_environ_dump_, sys_path_dump_, marshaller ): From 612b4660525d788b5011bae6ac3c2d237b47568b Mon Sep 17 00:00:00 2001 From: Elias Werner Date: Thu, 21 Nov 2024 17:29:03 +0100 Subject: [PATCH 12/13] fix visualization --- src/jumper/visualization.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/jumper/visualization.py b/src/jumper/visualization.py index c0ef635..01bdc6d 100644 --- a/src/jumper/visualization.py +++ b/src/jumper/visualization.py @@ -153,7 +153,7 @@ def plot_graph(ax, metric, perfdata, time_indices=None, color=None): # in multi node case, we have to iterate over the indices ( # time_indices) and not only 0 here current_index = 0 - target_index = -1 + target_index = 0 transition_offset = (x_scale[1] - x_scale[0]) / 2 start_offset = 0 last_idx = time_indices[0][-1][0] @@ -167,7 +167,7 @@ def plot_graph(ax, metric, perfdata, time_indices=None, color=None): transition_offset = 0 ax.axvspan( x_scale[current_index] + start_offset, - x_scale[target_index] + transition_offset, + x_scale[min(target_index, len(x_scale)-1)] + transition_offset, facecolor=color[cell_idx], alpha=0.3, ) @@ -177,7 +177,7 @@ def plot_graph(ax, metric, perfdata, time_indices=None, color=None): + start_offset + ( ( - x_scale[target_index] + x_scale[min(target_index, len(x_scale)-1)] + transition_offset - x_scale[current_index] + start_offset From 17e11ef4c9eadf426a61a0382c3e4fb6a79bca85 Mon Sep 17 00:00:00 2001 From: Elias Werner Date: Thu, 28 Nov 2024 16:55:20 +0100 Subject: [PATCH 13/13] Update pyproject.toml --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 6511b6d..cbd21e2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -7,7 +7,7 @@ build-backend = "setuptools.build_meta:__legacy__" [project] name='jumper-kernel' -version='1.0.0' +version='1.1.0' authors=[ {name='Elias Werner',email='elias.werner@tu-dresden.de'}, ]