Skip to content

Commit

Permalink
Add option to output a new config file (#63)
Browse files Browse the repository at this point in the history
Add write-config option to the runanalyzer. This provides an optimized configuration file. 

---------

Co-authored-by: Kevin Sayers <[email protected]>
Co-authored-by: Mark Schreiber <[email protected]>
  • Loading branch information
3 people authored Sep 24, 2024
1 parent 40da936 commit e2a8286
Show file tree
Hide file tree
Showing 5 changed files with 81 additions and 2 deletions.
9 changes: 9 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -432,6 +432,15 @@ this returns something like:
omics-run-analyzer: wrote run-1234567.json
```

#### Output optimized configuration
> [!WARNING]
> Currently this feature only supports Nextflow workflows.
The `--write-config` option will write a new configuration file with the `recommendedCpus` and `recommendedMemoryGiB` as the resource requirements. This will take the maximum values if the task is run multiple times with different inputs.

```bash
python -m omics.cli.run_analyzer 123456 --write-config=optimized.config
```
## Security

See [CONTRIBUTING](https://github.com/awslabs/amazon-omics-tools/blob/main/CONTRIBUTING.md#security-issue-notifications) for more information.
Expand Down
28 changes: 26 additions & 2 deletions omics/cli/run_analyzer/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
[--out=<path>]
[--plot=<directory>]
[--headroom=<float>]
[--write-config=<path>]
[--help]
Options:
Expand All @@ -24,6 +25,7 @@
-o, --out=<path> Write output to file
-P, --plot=<directory> Plot a run timeline to a directory
-H, --headroom=<float> Adds a fractional buffer to the size of recommended memory and CPU. Values must be between 0.0 and 1.0.
-c, --write-config=<path> Output a config file with recommended resources (Nextflow only)
-h, --help Show help text
Examples:
Expand Down Expand Up @@ -54,8 +56,8 @@
import dateutil.utils
import docopt
from bokeh.plotting import output_file

from . import timeline # type: ignore
from . import writeconfig

exename = os.path.basename(sys.argv[0])
OMICS_LOG_GROUP = "/aws/omics/WorkflowLog"
Expand Down Expand Up @@ -407,7 +409,6 @@ def get_timeline_event(res, resources):
"running": (time3 - time2).total_seconds(),
}


if __name__ == "__main__":
# Parse command-line options
opts = docopt.docopt(__doc__)
Expand Down Expand Up @@ -519,11 +520,33 @@ def tocsv(val):

writer = csv.writer(out, lineterminator="\n")
writer.writerow(formatted_headers)
config = {}

for res in resources:
add_metrics(res, resources, pricing, headroom)
metrics = res.get("metrics", {})
if res['type'] == 'run':
omics = session.client("omics")
wfid = res['workflow'].split('/')[-1]
engine = omics.get_workflow(id=wfid)['engine']
if res['type'] == 'task':
task_name = writeconfig.get_base_task(engine, res['name'])
if task_name not in config.keys():
config[task_name] ={
'cpus': metrics['recommendedCpus'],
'mem': metrics['recommendedMemoryGiB']
}
else:
config[task_name] ={
'cpus': max(metrics['recommendedCpus'], config[task_name]['cpus']),
'mem': max(metrics['recommendedMemoryGiB'], config[task_name]['mem'])
}
row = [tocsv(metrics.get(h, res.get(h))) for h in hdrs]
writer.writerow(row)

if opts["--write-config"]:
filename = opts['--write-config']
writeconfig.create_config(engine, config, filename)
if opts["--out"]:
sys.stderr.write(f"{exename}: wrote {opts['--out']}\n")
if opts["--plot"]:
Expand Down Expand Up @@ -555,3 +578,4 @@ def tocsv(val):
title = f"arn: {run['arn']}, name: {run.get('name')}"

timeline.plot_timeline(resources, title=title, max_duration_hrs=run_duration_hrs)

32 changes: 32 additions & 0 deletions omics/cli/run_analyzer/writeconfig.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
import textwrap

def create_config(engine, task_resources, filename):
if engine == 'NEXTFLOW':
with open(filename, 'w') as out:
for task in task_resources:
task_string = textwrap.dedent(f"""
withName: {task} {{
cpu = {task_resources[task]['cpus']}
memory = {task_resources[task]['mem']}
}}
""")
out.write(task_string)

elif engine == 'CWL':
raise ValueError("--write-config does not currently support CWL workflows")
elif engine == 'WDL':
raise ValueError("--write-config does not currently support WDL workflows")
else:
raise ValueError("Unknown workflow engine")

def get_base_task(engine, task):
# Returns the base task name
if engine == 'NEXTFLOW':
individual_task = task.split(" ")[0]
return individual_task
elif engine == 'CWL':
return task
elif engine == 'WDL':
return task
else:
raise ValueError("Unknown workflow engine")
Empty file added tests/run_analyzer/__init__.py
Empty file.
14 changes: 14 additions & 0 deletions tests/run_analyzer/unit/test_writeconfig.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
import unittest
from omics.cli.run_analyzer import writeconfig

class TestGetBaseTask(unittest.TestCase):
def test_get_base_task_nextflow(self):
result = writeconfig.get_base_task('NEXTFLOW', 'task1 (sample1)')
self.assertEqual(result, 'task1')

def test_get_base_task_cwl(self):
result = writeconfig.get_base_task('CWL', 'task1 (sample1)')
self.assertRaises(ValueError)

if __name__ == '__main__':
unittest.main()

0 comments on commit e2a8286

Please sign in to comment.