Skip to content

Commit

Permalink
handle missing values and aggregation of a single run with scatters
Browse files Browse the repository at this point in the history
  • Loading branch information
markjschreiber committed Sep 27, 2024
1 parent 7ba54ac commit 6edbb73
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 8 deletions.
17 changes: 10 additions & 7 deletions omics/cli/run_analyzer/batch.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,16 +104,19 @@ def _do_aggregation(resources_list: list[dict], resource_key: str, operation: st
# special case for instance types
instances = []
for r in resources_list:
instances.append(r["metrics"][resource_key])
if resource_key in r["metrics"]:
instances.append(r["metrics"][resource_key])
return max(instances, key=lambda x: utils.omics_instance_weight(x))
else:
try:
return max([r["metrics"][resource_key] for r in resources_list])
except KeyError:
print(f"KeyError for {resource_key} in {resources_list}", file=sys.stderr)
return round(max([r["metrics"].get(resource_key, 0.0) for r in resources_list]), 4)
elif operation == "mean":
return round(statistics.mean([r["metrics"][resource_key] for r in resources_list]), 2)
data = [r["metrics"].get(resource_key, 0.0) for r in resources_list]
return round(statistics.mean(data=data), 4)
elif operation == "stdDev":
return round(statistics.stdev([r["metrics"][resource_key] for r in resources_list]), 2)
data = [r["metrics"].get(resource_key, 0.0) for r in resources_list]
if len(data) > 1:
return round(statistics.stdev(data=data), 4)
else:
return 0.000
else:
raise ValueError(f"Invalid aggregation operation: {operation}")
4 changes: 3 additions & 1 deletion tests/cli/run_analyzer/unit/test_batch.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,9 @@ def test_aggregate_and_print_resources(self):
],
]
header_string = ",".join(batch.hdrs) + "\n"
expected = header_string + "task,foo,4,20.0,30.0,8.16,1.0,1.0,0,4,8,omics.c.large,1.0,1.0\n"
expected = (
header_string + "task,foo,4,20.0,30.0,8.165,1.0,1.0,0,4,8,omics.c.large,1.0,1.0\n"
)
with io.StringIO() as result:
batch.aggregate_and_print(
run_resources_list=resources_list, pricing={}, engine="WDL", out=result
Expand Down

0 comments on commit 6edbb73

Please sign in to comment.