Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Timeline integration #55

Merged
merged 9 commits into from
Aug 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -384,6 +384,16 @@ The CSV output by the command above includes the following columns:
> [!WARNING]
> At this time AWS HealthOmics does not report the average or maximum storage used by runs that use "DYNAMIC" storage that run for under two hours. Because of this limitation the `storageMaximumGiB` and `storageAverageGiB` are set to zero and will not be included in the estimate run cost.

#### Produce a timeline plot for a run

The RunAnalyzer tool can produce an interative timeline plot of a workflow. The plots allow you to visualize how individual tasks ran over the course of the run.

```bash
python -m omics.cli.run_analyzer -P plots/ 7113639
```

![Example time line image showing stacked horizontal bars indicating the time taken by each task](./assets/timeline.png)

#### Output workflow run manifest in JSON format

```bash
Expand Down
Binary file added assets/timeline.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
34 changes: 34 additions & 0 deletions omics/cli/run_analyzer/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
[--timeline]
[--file=<path>]
[--out=<path>]
[--plot=<directory>]
[--help]

Options:
Expand All @@ -20,6 +21,7 @@
-T, --timeline Show workflow run timeline
-f, --file=<path> Load input from file
-o, --out=<path> Write output to file
-P, --plot=<directory> Plot a run timeline to a directory
-h, --help Show help text

Examples:
Expand All @@ -42,7 +44,10 @@

import boto3
import dateutil
import dateutil.utils
import docopt
import timeline # type: ignore
from bokeh.plotting import output_file

exename = os.path.basename(sys.argv[0])
OMICS_LOG_GROUP = "/aws/omics/WorkflowLog"
Expand Down Expand Up @@ -493,3 +498,32 @@ def tocsv(val):
writer.writerow(row)
if opts["--out"]:
sys.stderr.write(f"{exename}: wrote {opts['--out']}\n")
if opts["--plot"]:
if len(resources) < 1:
die("no resources to plot")

run = {}
for res in resources:
rtype = re.split(r"[:/]", res["arn"])[-2]
if rtype == "run":
run = res
resources.remove(res) # we don't want the run in the data to plot
break

start = datetime.datetime.strptime(run["startTime"], "%Y-%m-%dT%H:%M:%S.%fZ")
stop = datetime.datetime.strptime(run["stopTime"], "%Y-%m-%dT%H:%M:%S.%fZ")
run_duration_hrs = (stop - start).total_seconds() / 3600

runid = run["arn"].split("/")[-1]
output_file_basename = f"{runid}_timeline"

# open or create the plot directory
plot_dir = opts["--plot"]
if not os.path.isdir(plot_dir):
os.makedirs(plot_dir)
output_file(
filename=os.path.join(plot_dir, f"{output_file_basename}.html"), title=runid, mode="cdn"
)
title = f"arn: {run['arn']}, name: {run.get('name')}"

timeline.plot_timeline(resources, title=title, max_duration_hrs=run_duration_hrs)
139 changes: 139 additions & 0 deletions omics/cli/run_analyzer/timeline.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,139 @@
import sys
from datetime import datetime

import pandas as pd # type: ignore
from bokeh.layouts import column
from bokeh.models import ColumnDataSource, Div, Range1d
from bokeh.plotting import figure, show

TIME_SCALE_FACTORS = {"sec": 1, "min": 1 / 60, "hr": 1 / 3600, "day": 1 / 86400}

TASK_COLORS = {"COMPLETED": "cornflowerblue", "FAILED": "crimson", "CANCELLED": "orange"}


def _parse_time_str(time_str):
# if time_str is actually a datetime just return it
if isinstance(time_str, datetime):
return time_str

try:
return datetime.strptime(time_str, "%Y-%m-%dT%H:%M:%S.%fZ")
except ValueError:
return datetime.strptime(time_str, "%Y-%m-%dT%H:%M:%S%fZ")


def _get_task_timings_data(tasks, time_units="min"):
time_scale_factor = TIME_SCALE_FACTORS[time_units]

# remove tasks that don't have a creationTime
filtered_tasks = [task for task in tasks if task.get("creationTime")]
if not filtered_tasks:
print("No tasks found with timing data, a plot cannot be created", file=sys.stderr)
return pd.DataFrame()

tare = min([_parse_time_str(task["creationTime"]) for task in filtered_tasks])

for i, task in enumerate(tasks):
if "creationTime" not in task:
task["creationTime"] = tare
task["startTime"] = task["creationTime"]
task["stopTime"] = task["creationTime"]
else:
task["creationTime"] = _parse_time_str(task["creationTime"])
task["startTime"] = _parse_time_str(task["startTime"])
task["stopTime"] = _parse_time_str(task["stopTime"])
task["cpus"] = task.get("cpus", 0)
task["gpus"] = task.get("gpus", 0)
task["memory"] = task.get("memory", 0)
task["instanceType"] = task.get("instanceType", "N/A")

task["y"] = i
task["color"] = TASK_COLORS[task["status"]]

task["running_left"] = (task["startTime"] - tare).total_seconds() * time_scale_factor
task["running_right"] = (task["stopTime"] - tare).total_seconds() * time_scale_factor
task["running_duration"] = task["running_right"] - task["running_left"]

task["queued_left"] = (task["creationTime"] - tare).total_seconds() * time_scale_factor
task["queued_right"] = task["running_left"]
task["queued_duration"] = task["queued_right"] - task["queued_left"]

task["label"] = f"({task['arn']}) {task['name']}"
task["text_x"] = (task["stopTime"] - tare).total_seconds() + 30 * time_scale_factor

tasks[i] = task
task["estimatedUSD"] = task["metrics"].get("estimatedUSD", 0.0)

return pd.DataFrame.from_records(tasks).sort_values("creationTime")


def plot_timeline(tasks, title="", time_units="min", max_duration_hrs=5, show_plot=True):
"""Plot a time line figure for supplied tasks"""
time_scale_factor = TIME_SCALE_FACTORS[time_units]
data = _get_task_timings_data(tasks, time_units=time_units)

source = ColumnDataSource(data)

tooltips = [
("taskId", "@arn"),
("name", "@name"),
("cpus", "@cpus"),
("gpus", "@gpus"),
("memory", "@memory GiB"),
("instanceType", "@instanceType"),
("queued", f"@queued_duration {time_units}"),
("duration", f"@running_duration {time_units}"),
("status", "@status"),
("est. cost USD", "@estimatedUSD"),
]

p_run = figure(width=960, height=800, sizing_mode="stretch_both", tooltips=tooltips)
p_run.hbar(
y="y",
left="queued_left",
right="queued_right",
height=0.8,
color="lightgrey",
source=source,
legend_label="queued",
)
p_run.hbar(
y="y",
left="running_left",
right="running_right",
height=0.8,
color="color",
source=source,
legend_label="running",
)
if len(data) < 50:
p_run.text(
x="text_x",
y="y",
text="name",
alpha=0.4,
text_baseline="middle",
text_font_size="1.5ex",
source=source,
)
x_max = max_duration_hrs * 3600 * time_scale_factor # max expected workflow duration in hours
x_min = -(x_max * 0.05)
p_run.x_range = Range1d(x_min, x_max)
p_run.y_range.flipped = False
p_run.xaxis.axis_label = f"task execution time ({time_units})"
p_run.yaxis.visible = False
p_run.legend.location = "top_right"
max_stop_time = data["stopTime"].max()
min_creation_time = data["creationTime"].min()
p_run.title.text = (
f"{title}, "
f"tasks: {len(tasks)}, "
f"wall time: {(_parse_time_str(max_stop_time) - _parse_time_str(min_creation_time)).total_seconds() * time_scale_factor:.2f} {time_units}"
)

layout = column(Div(text=f"<strong>{title}</strong>"), p_run)

if show_plot:
show(p_run)

return layout
1,333 changes: 925 additions & 408 deletions poetry.lock

Large diffs are not rendered by default.

2 changes: 2 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@ boto3-stubs = "^1.34.160"
botocore-stubs = "^1.34.160"
docopt = "^0.6.2"
python-dateutil = "^2.8.2"
bokeh = "^3.5.1"
pandas = "^2.2.2"

[tool.poetry.group.dev.dependencies]
black = ">=22.8,<25.0"
Expand Down
Loading