awslabs · markjschreiber · Aug 19, 2024 · Aug 16, 2024 · Aug 16, 2024 · Aug 19, 2024
diff --git a/README.md b/README.md
@@ -384,6 +384,16 @@ The CSV output by the command above includes the following columns:
 > [!WARNING]  
 > At this time AWS HealthOmics does not report the average or maximum storage used by runs that use "DYNAMIC" storage that run for under two hours. Because of this limitation the `storageMaximumGiB` and `storageAverageGiB` are set to zero and will not be included in the estimate run cost.
 
+#### Produce a timeline plot for a run
+
+The RunAnalyzer tool can produce an interative timeline plot of a workflow. The plots allow you to visualize how individual tasks ran over the course of the run.
+
+```bash
+python -m omics.cli.run_analyzer -P plots/ 7113639
+```
+
+![Example time line image showing stacked horizontal bars indicating the time taken by each task](./assets/timeline.png)
+
 #### Output workflow run manifest in JSON format
 
 ```bash

diff --git a/assets/timeline.png b/assets/timeline.png
diff --git a/omics/cli/run_analyzer/__main__.py b/omics/cli/run_analyzer/__main__.py
@@ -10,6 +10,7 @@
                           [--timeline]
                           [--file=<path>]
                           [--out=<path>]
+                          [--plot=<directory>]
                           [--help]
 
 Options:
@@ -20,6 +21,7 @@
  -T, --timeline           Show workflow run timeline
  -f, --file=<path>        Load input from file
  -o, --out=<path>         Write output to file
+ -P, --plot=<directory>   Plot a run timeline to a directory
  -h, --help               Show help text
 
 Examples:
@@ -42,7 +44,10 @@
 
 import boto3
 import dateutil
+import dateutil.utils
 import docopt
+import timeline  # type: ignore
+from bokeh.plotting import output_file
 
 exename = os.path.basename(sys.argv[0])
 OMICS_LOG_GROUP = "/aws/omics/WorkflowLog"
@@ -493,3 +498,32 @@ def tocsv(val):
                 writer.writerow(row)
         if opts["--out"]:
             sys.stderr.write(f"{exename}: wrote {opts['--out']}\n")
+    if opts["--plot"]:
+        if len(resources) < 1:
+            die("no resources to plot")
+
+        run = {}
+        for res in resources:
+            rtype = re.split(r"[:/]", res["arn"])[-2]
+            if rtype == "run":
+                run = res
+                resources.remove(res)  # we don't want the run in the data to plot
+                break
+
+        start = datetime.datetime.strptime(run["startTime"], "%Y-%m-%dT%H:%M:%S.%fZ")
+        stop = datetime.datetime.strptime(run["stopTime"], "%Y-%m-%dT%H:%M:%S.%fZ")
+        run_duration_hrs = (stop - start).total_seconds() / 3600
+
+        runid = run["arn"].split("/")[-1]
+        output_file_basename = f"{runid}_timeline"
+
+        # open or create the plot directory
+        plot_dir = opts["--plot"]
+        if not os.path.isdir(plot_dir):
+            os.makedirs(plot_dir)
+        output_file(
+            filename=os.path.join(plot_dir, f"{output_file_basename}.html"), title=runid, mode="cdn"
+        )
+        title = f"arn: {run['arn']}, name: {run.get('name')}"
+
+        timeline.plot_timeline(resources, title=title, max_duration_hrs=run_duration_hrs)
diff --git a/omics/cli/run_analyzer/timeline.py b/omics/cli/run_analyzer/timeline.py
@@ -0,0 +1,139 @@
+import sys
+from datetime import datetime
+
+import pandas as pd  # type: ignore
+from bokeh.layouts import column
+from bokeh.models import ColumnDataSource, Div, Range1d
+from bokeh.plotting import figure, show
+
+TIME_SCALE_FACTORS = {"sec": 1, "min": 1 / 60, "hr": 1 / 3600, "day": 1 / 86400}
+
+TASK_COLORS = {"COMPLETED": "cornflowerblue", "FAILED": "crimson", "CANCELLED": "orange"}
+
+
+def _parse_time_str(time_str):
+    # if time_str is actually a datetime just return it
+    if isinstance(time_str, datetime):
+        return time_str
+
+    try:
+        return datetime.strptime(time_str, "%Y-%m-%dT%H:%M:%S.%fZ")
+    except ValueError:
+        return datetime.strptime(time_str, "%Y-%m-%dT%H:%M:%S%fZ")
+
+
+def _get_task_timings_data(tasks, time_units="min"):
+    time_scale_factor = TIME_SCALE_FACTORS[time_units]
+
+    # remove tasks that don't have a creationTime
+    filtered_tasks = [task for task in tasks if task.get("creationTime")]
+    if not filtered_tasks:
+        print("No tasks found with timing data, a plot cannot be created", file=sys.stderr)
+        return pd.DataFrame()
+
+    tare = min([_parse_time_str(task["creationTime"]) for task in filtered_tasks])
+
+    for i, task in enumerate(tasks):
+        if "creationTime" not in task:
+            task["creationTime"] = tare
+            task["startTime"] = task["creationTime"]
+            task["stopTime"] = task["creationTime"]
+        else:
+            task["creationTime"] = _parse_time_str(task["creationTime"])
+            task["startTime"] = _parse_time_str(task["startTime"])
+            task["stopTime"] = _parse_time_str(task["stopTime"])
+        task["cpus"] = task.get("cpus", 0)
+        task["gpus"] = task.get("gpus", 0)
+        task["memory"] = task.get("memory", 0)
+        task["instanceType"] = task.get("instanceType", "N/A")
+
+        task["y"] = i
+        task["color"] = TASK_COLORS[task["status"]]
+
+        task["running_left"] = (task["startTime"] - tare).total_seconds() * time_scale_factor
+        task["running_right"] = (task["stopTime"] - tare).total_seconds() * time_scale_factor
+        task["running_duration"] = task["running_right"] - task["running_left"]
+
+        task["queued_left"] = (task["creationTime"] - tare).total_seconds() * time_scale_factor
+        task["queued_right"] = task["running_left"]
+        task["queued_duration"] = task["queued_right"] - task["queued_left"]
+
+        task["label"] = f"({task['arn']}) {task['name']}"
+        task["text_x"] = (task["stopTime"] - tare).total_seconds() + 30 * time_scale_factor
+
+        tasks[i] = task
+        task["estimatedUSD"] = task["metrics"].get("estimatedUSD", 0.0)
+
+    return pd.DataFrame.from_records(tasks).sort_values("creationTime")
+
+
+def plot_timeline(tasks, title="", time_units="min", max_duration_hrs=5, show_plot=True):
+    """Plot a time line figure for supplied tasks"""
+    time_scale_factor = TIME_SCALE_FACTORS[time_units]
+    data = _get_task_timings_data(tasks, time_units=time_units)
+
+    source = ColumnDataSource(data)
+
+    tooltips = [
+        ("taskId", "@arn"),
+        ("name", "@name"),
+        ("cpus", "@cpus"),
+        ("gpus", "@gpus"),
+        ("memory", "@memory GiB"),
+        ("instanceType", "@instanceType"),
+        ("queued", f"@queued_duration {time_units}"),
+        ("duration", f"@running_duration {time_units}"),
+        ("status", "@status"),
+        ("est. cost USD", "@estimatedUSD"),
+    ]
+
+    p_run = figure(width=960, height=800, sizing_mode="stretch_both", tooltips=tooltips)
+    p_run.hbar(
+        y="y",
+        left="queued_left",
+        right="queued_right",
+        height=0.8,
+        color="lightgrey",
+        source=source,
+        legend_label="queued",
+    )
+    p_run.hbar(
+        y="y",
+        left="running_left",
+        right="running_right",
+        height=0.8,
+        color="color",
+        source=source,
+        legend_label="running",
+    )
+    if len(data) < 50:
+        p_run.text(
+            x="text_x",
+            y="y",
+            text="name",
+            alpha=0.4,
+            text_baseline="middle",
+            text_font_size="1.5ex",
+            source=source,
+        )
+    x_max = max_duration_hrs * 3600 * time_scale_factor  # max expected workflow duration in hours
+    x_min = -(x_max * 0.05)
+    p_run.x_range = Range1d(x_min, x_max)
+    p_run.y_range.flipped = False
+    p_run.xaxis.axis_label = f"task execution time ({time_units})"
+    p_run.yaxis.visible = False
+    p_run.legend.location = "top_right"
+    max_stop_time = data["stopTime"].max()
+    min_creation_time = data["creationTime"].min()
+    p_run.title.text = (
+        f"{title}, "
+        f"tasks: {len(tasks)}, "
+        f"wall time: {(_parse_time_str(max_stop_time) - _parse_time_str(min_creation_time)).total_seconds() * time_scale_factor:.2f} {time_units}"
+    )
+
+    layout = column(Div(text=f"<strong>{title}</strong>"), p_run)
+
+    if show_plot:
+        show(p_run)
+
+    return layout
diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -21,6 +21,8 @@ boto3-stubs = "^1.34.160"
 botocore-stubs = "^1.34.160"
 docopt = "^0.6.2"
 python-dateutil = "^2.8.2"
+bokeh = "^3.5.1"
+pandas = "^2.2.2"
 
 [tool.poetry.group.dev.dependencies]
 black = ">=22.8,<25.0"