awslabs · KevinSayers · Sep 24, 2024 · Aug 27, 2024 · Aug 29, 2024 · Sep 6, 2024
diff --git a/omics/cli/run_analyzer/__main__.py b/omics/cli/run_analyzer/__main__.py
@@ -12,6 +12,7 @@
                           [--out=<path>]
                           [--plot=<directory>]
                           [--headroom=<float>]
+                          [--config=<path>]
                           [--help]
 
 Options:
@@ -24,6 +25,7 @@
  -o, --out=<path>         Write output to file
  -P, --plot=<directory>   Plot a run timeline to a directory
  -H, --headroom=<float>   Adds a fractional buffer to the size of recommended memory and CPU. Values must be between 0.0 and 1.0.
+ -c, --config=<path>      Output a config file with recommended resources
  -h, --help               Show help text
 
 Examples:
@@ -54,7 +56,7 @@
 import dateutil.utils
 import docopt
 from bokeh.plotting import output_file
-
+import textwrap
 from . import timeline  # type: ignore
 
 exename = os.path.basename(sys.argv[0])
@@ -410,6 +412,34 @@ def get_timeline_event(res, resources):
         "running": (time3 - time2).total_seconds(),
     }
 
+def create_config(engine, task_resources, filename):
+
+    if engine == 'NEXTFLOW':
+        with open(filename, 'w') as out:
+            for task in task_resources:
+                task_string = textwrap.dedent(f"""
+                withName: {task} {{
+                    cpu = {task_resources[task]['cpus']}
+                    memory = {task_resources[task]['mem']}
+                }}
+                """)
+                out.write(task_string)
+
+    elif engine == 'CWL':
+        with open(filename, "w") as out:
+            for task in task_resources:
+                task_string = textwrap.dedent(f"""
+                {task}:
+                    coresMin: {task_resources[task]['cpus']}
+                    ramMin: {task_resources[task]['mem']}
+                """)
+                out.write(task_string)
+    elif engine == 'WDL':
+        pass
+    else:
+        raise ValueError("Unknown workflow engine")
+
+
 
 if __name__ == "__main__":
     # Parse command-line options
@@ -522,11 +552,33 @@ def tocsv(val):
 
             writer = csv.writer(out, lineterminator="\n")
             writer.writerow(formatted_headers)
+            config = {}
+
             for res in resources:
                 add_metrics(res, resources, pricing, headroom)
                 metrics = res.get("metrics", {})
+                if res['type'] == 'run':
+                    omics = session.client("omics")
+                    wfid = res['workflow'].split('/')[-1]
+                    engine = omics.get_workflow(id=wfid)['engine']
+                if res['type'] == 'task':
+                    task_name = res['name'].split(" ")[0]
+                    if task_name not in config.keys():
+                        config[task_name] ={
+                            'cpus': metrics['recommendedCpus'],
+                            'mem': metrics['recommendedMemoryGiB']
+                        }
+                    else:
+                        config[task_name] ={
+                            'cpus': max(metrics['recommendedCpus'], config[task_name]['cpus']),
+                            'mem': max(metrics['recommendedMemoryGiB'], config[task_name]['mem'])
+                        }
                 row = [tocsv(metrics.get(h, res.get(h))) for h in hdrs]
                 writer.writerow(row)
+
+            if opts["--config"]:
+                filename = opts['--config']
+                create_config(engine, config, filename)
         if opts["--out"]:
             sys.stderr.write(f"{exename}: wrote {opts['--out']}\n")
     if opts["--plot"]:
@@ -558,3 +610,4 @@ def tocsv(val):
         title = f"arn: {run['arn']}, name: {run.get('name')}"
 
         timeline.plot_timeline(resources, title=title, max_duration_hrs=run_duration_hrs)
+