From 33dfd2fe8b5a27c59bb216e5fa43325166f82457 Mon Sep 17 00:00:00 2001
From: Joana Niermann <joana.niermann@cern.ch>
Date: Tue, 7 Jan 2025 19:25:28 +0100
Subject: [PATCH] Add CPU vs GPU comparison plots per algebra-plugin

---
 tests/tools/python/propagation_benchmarks.py | 139 +++++++++++++------
 1 file changed, 96 insertions(+), 43 deletions(-)

diff --git a/tests/tools/python/propagation_benchmarks.py b/tests/tools/python/propagation_benchmarks.py
index 53926c421..fde313626 100644
--- a/tests/tools/python/propagation_benchmarks.py
+++ b/tests/tools/python/propagation_benchmarks.py
@@ -35,11 +35,14 @@
 # Convert benchmark timings to 'ms'
 unit_conversion = {"ns": 10**-6, "um": 10**-3, "ms": 1, "s": 10**3}
 
-# Known processor types
-proc_types = ["cpu", "cuda", "sycl"]
+# Known hardware backend types
+bknd_types = ["cpu", "cuda", "sycl"]
 
 # Patterns to be removed from processor names for simplicity
-proc_patterns = ["CPU", "(TM)", "GHz", "@"]
+bknd_patterns = ["CPU", "(TM)", "GHz", "@"]
+
+# Plot types for benchmarks
+benchmark_plots = namedtuple("benchmark_plots", "latency throughput")
 
 
 def __main__():
@@ -148,8 +151,10 @@ def __main__():
         if file_extension != ".json":
             logging.error("Wrong file extension. Should be '.json': " + format_msg)
             sys.exit(1)
-        if not any(p in file_name for p in proc_types):
-            logging.error("No processor type found (cpu|cuda|sycl): " + format_msg)
+        if not any(p in file_name for p in bknd_types):
+            logging.error(
+                "No hardware backend type found (cpu|cuda|sycl): " + format_msg
+            )
             sys.exit(1)
         if not any(p in file_name for p in algebra_plugins):
             logging.error("No algebra-plugin name found: " + format_msg)
@@ -158,7 +163,7 @@ def __main__():
         input_data_files.append(file)
 
     # Gather and check benchmark executables and resulting data files for every
-    # processor type and algebra plugin
+    # hardware backend type and algebra plugin
     benchmark_files = namedtuple("benchmark_files", "bin data_files")
     benchmarks = {"cpu": benchmark_files([], [])}
     if args.cuda:
@@ -167,10 +172,10 @@ def __main__():
         # benchmarks["sycl"] = benchmark_files([], [])
         logging.error("SYCL propagation benchmark is not implemented")
 
-    for proc, files in benchmarks.items():
+    for bknd, files in benchmarks.items():
         for plugin in algebra_plugins:
-            binary = f"{bindir}/detray_propagation_benchmark_{proc}_{plugin}"
-            data_file = f"{det_name}_benchmark_data_{proc}_{plugin}.json"
+            binary = f"{bindir}/detray_propagation_benchmark_{bknd}_{plugin}"
+            data_file = f"{det_name}_benchmark_data_{bknd}_{plugin}.json"
 
             # If the results should not be read from file, run the benchmark
             if data_file not in (os.path.basename(f) for f in input_data_files):
@@ -243,21 +248,25 @@ def __main__():
         args_list = args_list + ["--material_file", args.material_file]
 
     # Run the benchmarks
-    for proc, files in benchmarks.items():
+    for bknd, files in benchmarks.items():
 
         if args.cuda or args.sycl:
             # Try to get the GPU name
-            gpu = "Unknown"
-            # gpu = str(subprocess.check_output(["nvidia-smi", "-L"]))
+            gpu = ""
+            try:
+                gpu = str(subprocess.check_output(["nvidia-smi", "-L"]))
+            except:
+                gpu = "Unknown"
+
             benchmark_options.append(f"--benchmark_context=GPU={gpu}")
 
         for binary in files.bin:
-            plugin = binary.split(f"benchmark_{proc}_")[-1]
+            plugin = binary.split(f"benchmark_{bknd}_")[-1]
             subprocess.run(
                 [
                     binary,
                     f"--benchmark_context=Plugin={plugin}",
-                    f"--benchmark_out=./{det_name}_benchmark_data_{proc}_{plugin}.json",
+                    f"--benchmark_out=./{det_name}_benchmark_data_{bknd}_{plugin}.json",
                 ]
                 + benchmark_options
                 + args_list
@@ -268,14 +277,14 @@ def __main__():
     logging.info("Generating plots...\n")
 
     plot_factory = plt_factory(out_dir, logging)
-    marker_styles = itertools.cycle(["o", "x", "*", "v", "s", "^", "<", ">"])
+    marker_styles = ["o", "x", "*", "v", "s", "^", "<", ">"]
 
     # Read the benchmark data and prepare it for plotting
-    def __prep_data(logging, input_dir, file):
+    def __prep_data(file):
         # Read the data part into a pandas frame
         context, data = read_benchmark_data(input_dir, logging, file)
         # If the cpu data could not be loaded, quit
-        if proc == "cpu" and (context is None or data is None):
+        if context is None or data is None:
             logging.warning(f"Failed to read data in file: {file}")
             sys.exit(1)
 
@@ -293,7 +302,7 @@ def __prep_data(logging, input_dir, file):
         return context, data
 
     # Simpler processor tag
-    def __compactify_proc_name(name, patterns=proc_patterns):
+    def __compactify_bknd_name(name, patterns=bknd_patterns):
         out = ""
         for sub_string in name.split(" "):
             if any(p in sub_string for p in patterns):
@@ -303,48 +312,43 @@ def __compactify_proc_name(name, patterns=proc_patterns):
 
         return out
 
-    benchmark_plots = namedtuple("benchmark_plots", "latency throughput")
-    plot_dict = {}
-
-    for proc, benchmark_data in benchmarks.items():
-
-        # Go through all benchmark data files for this processor type
-        for i, data_file in enumerate(benchmark_data.data_files):
-            file_stem, extension = os.path.splitext(data_file)
-            plugin = file_stem.split(f"{det_name}_benchmark_data_{proc}_")[-1]
+    # Plot the data of all benchmark files given in 'data_files'
+    def __generate_plots(file_list, label_list, title, plot_series_name):
+        # Save the different plots per hardware backend
+        plots = benchmark_plots(None, None)
+        marker_style_cycle = itertools.cycle(marker_styles)
 
+        # Go through all benchmark data files for this hardware backend type
+        for i, file in enumerate(file_list):
             # Benchmark results for the next algebra plugin
-            context, data = __prep_data(logging, input_dir, data_file)
-            marker = next(marker_styles)
+            context, data = __prep_data(file)
+            marker = next(marker_style_cycle)
 
             # Initialize plots
             if i == 0:
-                proc_name = __compactify_proc_name(
-                    context["CPU" if proc == "cpu" else "GPU"]
-                )
 
                 # Plot the data against the number of tracks
                 latency_plot = benchmark_plotter.plot_benchmark(
                     context=context,
                     df=data,
                     plot_factory=plot_factory,
-                    label=f"{plugin}",
+                    label=plot_labels[i],
                     data_type="real_time",
                     marker=marker,
-                    title=proc_name,
+                    title=title,
                 )
 
                 throughput_plot = benchmark_plotter.plot_benchmark(
                     context=context,
                     df=data,
                     plot_factory=plot_factory,
-                    label=f"{plugin}",
+                    label=plot_labels[i],
                     data_type="TracksPropagated",
                     marker=marker,
-                    title=proc_name,
+                    title=title,
                 )
 
-                plot_dict[proc] = benchmark_plots(latency_plot, throughput_plot)
+                plots = benchmark_plots(latency_plot, throughput_plot)
 
             # Add new data to plots
             else:
@@ -352,29 +356,78 @@ def __compactify_proc_name(name, patterns=proc_patterns):
                     context=context,
                     df=data,
                     plot_factory=plot_factory,
-                    label=f"{plugin}",
+                    label=plot_labels[i],
                     data_type="real_time",
                     marker=marker,
-                    plot=plot_dict[proc].latency,
+                    plot=plots.latency,
                 )
 
                 benchmark_plotter.plot_benchmark(
                     context=context,
                     df=data,
                     plot_factory=plot_factory,
-                    label=f"{plugin}",
+                    label=plot_labels[i],
                     data_type="TracksPropagated",
                     marker=marker,
-                    plot=plot_dict[proc].throughput,
+                    plot=plots.throughput,
                 )
 
         # Write to disk
         plot_factory.write_plot(
-            plot_dict[proc].latency, f"{det_name}_prop_latency_{proc}", out_format
+            plots.latency, f"{det_name}_{plot_series_name}_latency", out_format
         )
 
         plot_factory.write_plot(
-            plot_dict[proc].throughput, f"{det_name}_prop_throughput_{proc}", out_format
+            plots.throughput, f"{det_name}_{plot_series_name}_throughput", out_format
+        )
+
+    # Plot all data files per hardware backend (different algebra-plugins)
+    for bknd, benchmark_data in benchmarks.items():
+        # Peek into the benchmark context to get the name of the backend
+        context, _ = read_benchmark_data(
+            input_dir, logging, benchmark_data.data_files[0]
+        )
+        bknd_name = __compactify_bknd_name(
+            context["CPU" if bknd.find("cpu") != -1 else "GPU"]
+        )
+
+        # Generate plot labels
+        plot_labels = []
+        for file in benchmark_data.data_files:
+            # Get hardware backend and algebra-plugin from file name
+            file_stem, _ = os.path.splitext(file)
+            plugin = file_stem.split(f"{det_name}_benchmark_data_{bknd}_")[-1]
+            plot_labels.append(plugin)
+
+        __generate_plots(
+            benchmark_data.data_files,
+            plot_labels,
+            f"hardware backend: {bknd.upper()} ({bknd_name} )",
+            f"prop_benchmark_algebra-plugin_comparison_{bknd}",
+        )
+
+    # Plot results for different hardware backends using the same algebra plugin
+    for plugin in algebra_plugins:
+        data_files_per_plugin = []
+        plot_labels = []
+        for bknd, benchmark_data in benchmarks.items():
+            # Peek into the benchmark context to get the name of the backend
+            context, _ = read_benchmark_data(
+                input_dir, logging, benchmark_data.data_files[0]
+            )
+            bknd_name = __compactify_bknd_name(
+                context["CPU" if bknd.find("cpu") != -1 else "GPU"]
+            )
+            for data_file in benchmark_data.data_files:
+                if plugin in data_file:
+                    data_files_per_plugin.append(data_file)
+                    plot_labels.append(f"{bknd}: {bknd_name}")
+
+        __generate_plots(
+            data_files_per_plugin,
+            plot_labels,
+            f"algebra-plugin: {plugin}",
+            f"prop_benchmark_backend_comparison_{plugin}",
         )