-
Notifications
You must be signed in to change notification settings - Fork 6
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add option to output a new config file #63
Changes from 2 commits
bd9fc16
7138ded
8bb899c
92b04f1
8ea9adc
288157f
2a76bdb
440533b
0c2724f
7ffb8ee
b8f4b5a
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -12,6 +12,7 @@ | |
[--out=<path>] | ||
[--plot=<directory>] | ||
[--headroom=<float>] | ||
[--config=<path>] | ||
[--help] | ||
|
||
Options: | ||
|
@@ -24,6 +25,7 @@ | |
-o, --out=<path> Write output to file | ||
-P, --plot=<directory> Plot a run timeline to a directory | ||
-H, --headroom=<float> Adds a fractional buffer to the size of recommended memory and CPU. Values must be between 0.0 and 1.0. | ||
-c, --config=<path> Output a config file with recommended resources | ||
-h, --help Show help text | ||
|
||
Examples: | ||
|
@@ -54,7 +56,7 @@ | |
import dateutil.utils | ||
import docopt | ||
from bokeh.plotting import output_file | ||
|
||
import textwrap | ||
from . import timeline # type: ignore | ||
|
||
exename = os.path.basename(sys.argv[0]) | ||
|
@@ -410,6 +412,34 @@ def get_timeline_event(res, resources): | |
"running": (time3 - time2).total_seconds(), | ||
} | ||
|
||
def create_config(engine, task_resources, filename): | ||
|
||
if engine == 'NEXTFLOW': | ||
with open(filename, 'w') as out: | ||
for task in task_resources: | ||
task_string = textwrap.dedent(f""" | ||
withName: {task} {{ | ||
cpu = {task_resources[task]['cpus']} | ||
memory = {task_resources[task]['mem']} | ||
}} | ||
""") | ||
out.write(task_string) | ||
|
||
elif engine == 'CWL': | ||
with open(filename, "w") as out: | ||
for task in task_resources: | ||
task_string = textwrap.dedent(f""" | ||
{task}: | ||
coresMin: {task_resources[task]['cpus']} | ||
ramMin: {task_resources[task]['mem']} | ||
""") | ||
out.write(task_string) | ||
elif engine == 'WDL': | ||
pass | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Raise an error saying we don't support WDL |
||
else: | ||
raise ValueError("Unknown workflow engine") | ||
|
||
|
||
|
||
if __name__ == "__main__": | ||
# Parse command-line options | ||
|
@@ -522,11 +552,33 @@ def tocsv(val): | |
|
||
writer = csv.writer(out, lineterminator="\n") | ||
writer.writerow(formatted_headers) | ||
config = {} | ||
|
||
for res in resources: | ||
add_metrics(res, resources, pricing, headroom) | ||
metrics = res.get("metrics", {}) | ||
if res['type'] == 'run': | ||
omics = session.client("omics") | ||
wfid = res['workflow'].split('/')[-1] | ||
engine = omics.get_workflow(id=wfid)['engine'] | ||
if res['type'] == 'task': | ||
task_name = res['name'].split(" ")[0] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Will this always work? Do we know that our engines can't produce a task name like "[my workflow task]" with spaces? It might make this more future proof to have a constant for the task name split string because if we need to change it due to new engines, this line of code doesn't advertise it's intent. Alternatively, perhaps have a function to split the name, perhaps with an engine name as an argument if different engines do different things. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I will add a function that can parse the base task name and enforce the format of it. |
||
if task_name not in config.keys(): | ||
config[task_name] ={ | ||
'cpus': metrics['recommendedCpus'], | ||
'mem': metrics['recommendedMemoryGiB'] | ||
} | ||
else: | ||
config[task_name] ={ | ||
'cpus': max(metrics['recommendedCpus'], config[task_name]['cpus']), | ||
'mem': max(metrics['recommendedMemoryGiB'], config[task_name]['mem']) | ||
} | ||
row = [tocsv(metrics.get(h, res.get(h))) for h in hdrs] | ||
writer.writerow(row) | ||
|
||
if opts["--config"]: | ||
filename = opts['--config'] | ||
create_config(engine, config, filename) | ||
if opts["--out"]: | ||
sys.stderr.write(f"{exename}: wrote {opts['--out']}\n") | ||
if opts["--plot"]: | ||
|
@@ -558,3 +610,4 @@ def tocsv(val): | |
title = f"arn: {run['arn']}, name: {run.get('name')}" | ||
|
||
timeline.plot_timeline(resources, title=title, max_duration_hrs=run_duration_hrs) | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
specify that it is a Nextflow style config file
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
or Nextflow / CWL