diff --git a/README.md b/README.md index 081d7bf..3cb7b91 100644 --- a/README.md +++ b/README.md @@ -432,6 +432,15 @@ this returns something like: omics-run-analyzer: wrote run-1234567.json ``` +#### Output optimized configuration +> [!WARNING] +> Currently this feature only supports Nextflow workflows. + +The `--write-config` option will write a new configuration file with the `recommendedCpus` and `recommendedMemoryGiB` as the resource requirements. This will take the maximum values if the task is run multiple times with different inputs. + +```bash +python -m omics.cli.run_analyzer 123456 --write-config=optimized.config +``` ## Security See [CONTRIBUTING](https://github.com/awslabs/amazon-omics-tools/blob/main/CONTRIBUTING.md#security-issue-notifications) for more information. diff --git a/omics/cli/run_analyzer/__main__.py b/omics/cli/run_analyzer/__main__.py index d5ad679..c0b9d21 100755 --- a/omics/cli/run_analyzer/__main__.py +++ b/omics/cli/run_analyzer/__main__.py @@ -12,6 +12,7 @@ [--out=] [--plot=] [--headroom=] + [--write-config=] [--help] Options: @@ -24,6 +25,7 @@ -o, --out= Write output to file -P, --plot= Plot a run timeline to a directory -H, --headroom= Adds a fractional buffer to the size of recommended memory and CPU. Values must be between 0.0 and 1.0. + -c, --write-config= Output a config file with recommended resources (Nextflow only) -h, --help Show help text Examples: @@ -54,8 +56,8 @@ import dateutil.utils import docopt from bokeh.plotting import output_file - from . import timeline # type: ignore +from . import writeconfig exename = os.path.basename(sys.argv[0]) OMICS_LOG_GROUP = "/aws/omics/WorkflowLog" @@ -410,7 +412,6 @@ def get_timeline_event(res, resources): "running": (time3 - time2).total_seconds(), } - if __name__ == "__main__": # Parse command-line options opts = docopt.docopt(__doc__) @@ -522,11 +523,33 @@ def tocsv(val): writer = csv.writer(out, lineterminator="\n") writer.writerow(formatted_headers) + config = {} + for res in resources: add_metrics(res, resources, pricing, headroom) metrics = res.get("metrics", {}) + if res['type'] == 'run': + omics = session.client("omics") + wfid = res['workflow'].split('/')[-1] + engine = omics.get_workflow(id=wfid)['engine'] + if res['type'] == 'task': + task_name = writeconfig.get_base_task(engine, res['name']) + if task_name not in config.keys(): + config[task_name] ={ + 'cpus': metrics['recommendedCpus'], + 'mem': metrics['recommendedMemoryGiB'] + } + else: + config[task_name] ={ + 'cpus': max(metrics['recommendedCpus'], config[task_name]['cpus']), + 'mem': max(metrics['recommendedMemoryGiB'], config[task_name]['mem']) + } row = [tocsv(metrics.get(h, res.get(h))) for h in hdrs] writer.writerow(row) + + if opts["--write-config"]: + filename = opts['--write-config'] + writeconfig.create_config(engine, config, filename) if opts["--out"]: sys.stderr.write(f"{exename}: wrote {opts['--out']}\n") if opts["--plot"]: @@ -558,3 +581,4 @@ def tocsv(val): title = f"arn: {run['arn']}, name: {run.get('name')}" timeline.plot_timeline(resources, title=title, max_duration_hrs=run_duration_hrs) + diff --git a/omics/cli/run_analyzer/writeconfig.py b/omics/cli/run_analyzer/writeconfig.py new file mode 100644 index 0000000..a1950a7 --- /dev/null +++ b/omics/cli/run_analyzer/writeconfig.py @@ -0,0 +1,32 @@ +import textwrap + +def create_config(engine, task_resources, filename): + if engine == 'NEXTFLOW': + with open(filename, 'w') as out: + for task in task_resources: + task_string = textwrap.dedent(f""" + withName: {task} {{ + cpu = {task_resources[task]['cpus']} + memory = {task_resources[task]['mem']} + }} + """) + out.write(task_string) + + elif engine == 'CWL': + raise ValueError("--write-config does not currently support CWL workflows") + elif engine == 'WDL': + raise ValueError("--write-config does not currently support WDL workflows") + else: + raise ValueError("Unknown workflow engine") + +def get_base_task(engine, task): + # Returns the base task name + if engine == 'NEXTFLOW': + individual_task = task.split(" ")[0] + return individual_task + elif engine == 'CWL': + return task + elif engine == 'WDL': + return task + else: + raise ValueError("Unknown workflow engine") \ No newline at end of file diff --git a/tests/run_analyzer/__init__.py b/tests/run_analyzer/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/run_analyzer/unit/test_writeconfig.py b/tests/run_analyzer/unit/test_writeconfig.py new file mode 100644 index 0000000..4b4151e --- /dev/null +++ b/tests/run_analyzer/unit/test_writeconfig.py @@ -0,0 +1,14 @@ +import unittest +from omics.cli.run_analyzer import writeconfig + +class TestGetBaseTask(unittest.TestCase): + def test_get_base_task_nextflow(self): + result = writeconfig.get_base_task('NEXTFLOW', 'task1 (sample1)') + self.assertEqual(result, 'task1') + + def test_get_base_task_cwl(self): + result = writeconfig.get_base_task('CWL', 'task1 (sample1)') + self.assertRaises(ValueError) + +if __name__ == '__main__': + unittest.main() \ No newline at end of file