Skip to content

Commit

Permalink
Show only relevant benchmarks in CI summary
Browse files Browse the repository at this point in the history
This commit adds a new key to the benchcomp markdown table
visualization. If `delete_row_if_empty` is set to true on a table's
extra column, then for each benchmark, if the extra column evaluates to
an empty string, then the benchmark will not be displayed in the table.

The change to the CI configuration means that benchmarks whose
solver, symex, or total runtime changed by less than 10% will not be
displayed in the tables for those metrics. The number of VCCs and number
of program steps tables will also not contain benchmarks for which those
numbers didn't change, and the success table will now only show
benchmarks that are either newly passing or newly failing.
  • Loading branch information
karkhaz committed Mar 27, 2024
1 parent 1c3d0f3 commit 186a13e
Show file tree
Hide file tree
Showing 3 changed files with 25 additions and 7 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/bench.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ name: Kani Performance Benchmarks
on:
push:
branches:
- 'main'
- '*'
workflow_call:

jobs:
Expand Down
18 changes: 15 additions & 3 deletions tools/benchcomp/benchcomp/visualizers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,9 @@ class dump_markdown_results_table:
returns a string that is rendered in the benchmark's row in the new column.
This allows you to emit arbitrary text or markdown formatting in response to
particular combinations of values for different variants, such as
regressions or performance improvements.
regressions or performance improvements. If `delete_row_if_empty` is set to
`true` on the extra column, then benchmarks for which the extra row text
evaluates to an empty string will not be displayed in the table at all.
'scatterplot' takes the values 'off' (default), 'linear' (linearly scaled
axes), or 'log' (logarithmically scaled axes).
Expand All @@ -176,13 +178,16 @@ class dump_markdown_results_table:
else "**" + str(b["variant_2"]/b["variant_1"]) + "**"
success:
- column_name: change
delete_row_if_empty: true
text: >
lambda b: "" if b["variant_2"] == b["variant_1"]
else "newly passing" if b["variant_2"]
else "regressed"
```
Example output:
Example output. Note that `bench_1` does not appear in the second table
because it neither regressed or is newly passing, so the text in the extra
column is empty, and `delete_row_if_empty` is set to `true`.
```
## runtime
Expand All @@ -196,7 +201,6 @@ class dump_markdown_results_table:
| Benchmark | variant_1 | variant_2 | change |
| --- | --- | --- | --- |
| bench_1 | True | True | |
| bench_2 | True | False | regressed |
| bench_3 | False | True | newly passing |
```
Expand Down Expand Up @@ -355,10 +359,18 @@ def _add_extra_columns(self, metrics):
columns = self.extra_columns[metric]
except KeyError:
continue
to_delete = set()
for bench, variants in benches.items():
tmp_variants = dict(variants)
for column in columns:
variants[column["column_name"]] = column["text"](tmp_variants)
if all((
column.get("delete_row_if_empty"),
not variants[column["column_name"]])):
to_delete.add(bench)
break
for bench in to_delete:
benches.pop(bench)


@staticmethod
Expand Down
12 changes: 9 additions & 3 deletions tools/benchcomp/configs/perf-regression.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ visualize:
# embolden if the absolute difference is more than 10% of the old value
number_vccs:
- column_name: diff old → new
delete_row_if_empty: true
text: >
lambda b: "" if b["kani_new"] == b["kani_old"]
else ("**" if abs((b["kani_new"]-b["kani_old"])/b["kani_old"]) > 0.1 else "")
Expand All @@ -48,6 +49,7 @@ visualize:
+ ("**" if abs((b["kani_new"]-b["kani_old"])/b["kani_old"]) > 0.1 else "")
number_program_steps:
- column_name: diff old → new
delete_row_if_empty: true
text: >
lambda b: "" if b["kani_new"] == b["kani_old"]
else ("**" if abs((b["kani_new"]-b["kani_old"])/b["kani_old"]) > 0.1 else "")
Expand All @@ -59,24 +61,27 @@ visualize:
# cells whose absolute change is >50%
solver_runtime:
- column_name: "% change old → new"
delete_row_if_empty: true
text: >
lambda b: "" if b["kani_new"] == b["kani_old"]
lambda b: "" if abs((b["kani_new"]-b["kani_old"])/b["kani_old"]) < 0.1
else ("**" if abs((b["kani_new"]-b["kani_old"])/b["kani_old"]) > 0.5 else "")
+ ("+" if b["kani_new"] > b["kani_old"] else "")
+ "%.3f%%" % ((b["kani_new"] - b["kani_old"]) * 100 / b["kani_old"])
+ ("**" if abs((b["kani_new"]-b["kani_old"])/b["kani_old"]) > 0.5 else "")
verification_time:
- column_name: "% change old → new"
delete_row_if_empty: true
text: >
lambda b: "" if b["kani_new"] == b["kani_old"]
lambda b: "" if abs((b["kani_new"]-b["kani_old"])/b["kani_old"]) < 0.1
else ("**" if abs((b["kani_new"]-b["kani_old"])/b["kani_old"]) > 0.5 else "")
+ ("+" if b["kani_new"] > b["kani_old"] else "")
+ "%.3f%%" % ((b["kani_new"] - b["kani_old"]) * 100 / b["kani_old"])
+ ("**" if abs((b["kani_new"]-b["kani_old"])/b["kani_old"]) > 0.5 else "")
symex_runtime:
- column_name: "% change old → new"
delete_row_if_empty: true
text: >
lambda b: "" if b["kani_new"] == b["kani_old"]
lambda b: "" if abs((b["kani_new"]-b["kani_old"])/b["kani_old"]) < 0.1
else ("**" if abs((b["kani_new"]-b["kani_old"])/b["kani_old"]) > 0.5 else "")
+ ("+" if b["kani_new"] > b["kani_old"] else "")
+ "%.3f%%" % ((b["kani_new"] - b["kani_old"]) * 100 / b["kani_old"])
Expand All @@ -85,6 +90,7 @@ visualize:
# For success metric, display some text if success has changed
success:
- column_name: change
delete_row_if_empty: true
text: >
lambda b: "" if b["kani_new"] == b["kani_old"]
else "❌ newly failing" if b["kani_old"]
Expand Down

0 comments on commit 186a13e

Please sign in to comment.