Skip to content

Commit 12c343e

Browse files
authored
Add benchcomp (#2274)
This PR adds benchcomp, a tool for comparing one or more suites of benchmarks using two or more 'variants' (command line arguments and environment variables). benchcomp runs all combinations of suite x variant, parsing the unique output formats of each of these runs. benchcomp then combines the parsed outputs and writes them into a single file. benchcomp can post-process that combined file to create visualizations, exit if the results are not as expected, or perform other actions.
1 parent 56e7f93 commit 12c343e

21 files changed

+894
-0
lines changed

tools/benchcomp/.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
# the regression tests write result.yaml files into their directories
2+
result.yaml

tools/benchcomp/README.md

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
# Benchcomp
2+
3+
This directory contains `bin/benchcomp`, a tool for comparing one or
4+
more suites of benchmarks using two or more 'variants' (command line
5+
arguments and environment variables).
6+
7+
`benchcomp` runs all combinations of suite x variant, parsing the unique
8+
output formats of each of these runs. `benchcomp` then combines the
9+
parsed outputs and writes them into a single file. `benchcomp` can
10+
post-process that combined file to create visualizations, exit if the
11+
results are not as expected, or perform other actions.

tools/benchcomp/benchcomp/__init__.py

Lines changed: 119 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,119 @@
1+
# Copyright Kani Contributors
2+
# SPDX-License-Identifier: Apache-2.0 OR MIT
3+
#
4+
# Common utilities for benchcomp
5+
6+
7+
import argparse
8+
import collections
9+
import contextlib
10+
import dataclasses
11+
import logging
12+
import pathlib
13+
import sys
14+
import textwrap
15+
16+
import yaml
17+
18+
19+
class ConfigFile(collections.UserDict):
20+
_schema: str = textwrap.dedent("""\
21+
variants:
22+
type: dict
23+
keysrules:
24+
type: string
25+
valuesrules:
26+
schema:
27+
config:
28+
type: dict
29+
keysrules:
30+
type: string
31+
valuesrules:
32+
allow_unknown: true
33+
schema:
34+
command_line:
35+
type: string
36+
directory:
37+
type: string
38+
env:
39+
type: dict
40+
keysrules:
41+
type: string
42+
valuesrules:
43+
type: string
44+
run:
45+
type: dict
46+
keysrules:
47+
type: string
48+
schema:
49+
suites:
50+
type: dict
51+
keysrules:
52+
type: string
53+
valuesrules:
54+
schema:
55+
variants:
56+
type: list
57+
parser:
58+
type: dict
59+
keysrules:
60+
type: string
61+
valuesrules:
62+
anyof:
63+
- schema:
64+
type: {}
65+
filter: {}
66+
visualize: {}
67+
""")
68+
69+
def __init__(self, path):
70+
super().__init__()
71+
72+
try:
73+
with open(path, encoding="utf-8") as handle:
74+
data = yaml.safe_load(handle)
75+
except (FileNotFoundError, OSError) as exc:
76+
raise argparse.ArgumentTypeError(
77+
f"{path}: file not found") from exc
78+
79+
schema = yaml.safe_load(self._schema)
80+
try:
81+
import cerberus
82+
validate = cerberus.Validator(schema)
83+
if not validate(data):
84+
for error in validate._errors:
85+
doc_path = "/".join(error.document_path)
86+
msg = (
87+
f"config file '{path}': key "
88+
f"'{doc_path}': expected "
89+
f"{error.constraint}, got '{error.value}'")
90+
if error.rule:
91+
msg += f" (rule {error.rule})"
92+
msg += f" while traversing {error.schema_path}"
93+
logging.error(msg)
94+
logging.error(validate.document_error_tree["variants"])
95+
raise argparse.ArgumentTypeError(
96+
"failed to validate configuration file")
97+
except ImportError:
98+
pass
99+
self.data = data
100+
101+
102+
@dataclasses.dataclass
103+
class Outfile:
104+
"""Return a handle to a file on disk or stdout if given '-'"""
105+
106+
path: str
107+
108+
def __str__(self):
109+
return str(self.path)
110+
111+
@contextlib.contextmanager
112+
def __call__(self):
113+
if self.path == "-":
114+
yield sys.stdout
115+
return
116+
path = pathlib.Path(self.path)
117+
path.parent.mkdir(exist_ok=True)
118+
with open(path, "w", encoding="utf-8") as handle:
119+
yield handle

tools/benchcomp/benchcomp/cmd_args.py

Lines changed: 225 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,225 @@
1+
# Copyright Kani Contributors
2+
# SPDX-License-Identifier: Apache-2.0 OR MIT
3+
#
4+
# Command line argument processing
5+
6+
7+
import argparse
8+
import importlib
9+
import pathlib
10+
import re
11+
import textwrap
12+
13+
import benchcomp
14+
import benchcomp.entry.benchcomp
15+
import benchcomp.entry.run
16+
17+
18+
def _get_epilogs():
19+
epilogs = {
20+
"top_level": """\
21+
benchcomp can help you to understand the difference between two or
22+
more toolchains, by running benchmarks that use those toolchains and
23+
comparing the results.
24+
25+
benchcomp runs two or more 'variants' of a set of benchmark suites,
26+
and compares and visualizes the results of these variants. This
27+
allows you to understand the differences between the two variants,
28+
for example how they affect the benchmarks' performance or output or
29+
even whether they pass at all.
30+
31+
benchmark is structured as a pipeline of several commands. Running
32+
`benchcomp` runs each of them sequentially. You can run the
33+
subcommands manually to dump the intermediate files if required.""",
34+
"run": """\
35+
The run command writes one YAML file for each (suite, variant) pair.
36+
These YAML files are in "suite.yaml" format. Typically, users
37+
should read the combined YAML file emitted by `benchcomp collate`
38+
rather than the multiple YAML files written by `benchcomp run`.
39+
40+
The `run` command writes its output files into a directory, which
41+
`collate` then reads from. By default, `run` writes the files into a
42+
new directory with a common prefix on each invocation, meaning that
43+
all previous runs are preserved without the user needing to specify
44+
a different directory each time. Benchcomp also creates a symbolic
45+
link to the latest run. Thus, the directories after several runs
46+
will look something like this:
47+
48+
/tmp/benchcomp/suites/2F0D3DC4-0D02-4E95-B887-4759F08FA90D
49+
/tmp/benchcomp/suites/119F11EB-9BC0-42D8-9EC1-47DFD661AC88
50+
/tmp/benchcomp/suites/A3E83FE8-CD42-4118-BED3-ED89EC88BFB0
51+
/tmp/benchcomp/suites/latest -> /tmp/benchcomp/suites/119F11EB...
52+
53+
'/tmp/benchcomp/suites' is the "out-prefix"; the UUID is the
54+
"out-dir"; and '/tmp/benchcomp/latest' is the "out-symlink". Users
55+
can set each of these manually by passing the corresponding flag, if
56+
needed.
57+
58+
Passing `--out-symlink ./latest` will place the symbolic link in the
59+
current directory, while keeping all runs under /tmp to avoid
60+
clutter. If you wish to keep all previous runs in a local directory,
61+
you can do so with
62+
63+
`--out-prefix ./output --out-symlink ./output/latest`""",
64+
"filter": "", # TODO
65+
"visualize": "", # TODO
66+
"collate": "",
67+
}
68+
69+
wrapper = textwrap.TextWrapper()
70+
ret = {}
71+
for subcommand, epilog in epilogs.items():
72+
paragraphs = re.split(r"\n\s*\n", epilog)
73+
buf = []
74+
for p in paragraphs:
75+
p = textwrap.dedent(p)
76+
buf.extend(wrapper.wrap(p))
77+
buf.append("")
78+
ret[subcommand] = "\n".join(buf)
79+
return ret
80+
81+
82+
def _existing_directory(arg):
83+
path = pathlib.Path(arg)
84+
if not path.exists():
85+
raise ValueError(f"directory '{arg}' must already exist")
86+
return path
87+
88+
89+
def _get_args_dict():
90+
epilogs = _get_epilogs()
91+
ret = {
92+
"top_level": {
93+
"description":
94+
"Run and compare variants of a set of benchmark suites",
95+
"epilog": epilogs["top_level"],
96+
"formatter_class": argparse.RawDescriptionHelpFormatter,
97+
},
98+
"args": [],
99+
"subparsers": {
100+
"title": "benchcomp subcommands",
101+
"description":
102+
"You can invoke each stage of the benchcomp pipeline "
103+
"separately if required",
104+
"parsers": {
105+
"run": {
106+
"help": "run all variants of all benchmark suites",
107+
"args": [{
108+
"flags": ["--out-prefix"],
109+
"metavar": "D",
110+
"type": pathlib.Path,
111+
"default": benchcomp.entry.run.get_default_out_prefix(),
112+
"help":
113+
"write suite.yaml files to a new directory under D "
114+
"(default: %(default)s)",
115+
}, {
116+
"flags": ["--out-dir"],
117+
"metavar": "D",
118+
"type": str,
119+
"default": benchcomp.entry.run.get_default_out_dir(),
120+
"help":
121+
"write suite.yaml files to D relative to "
122+
"--out-prefix (must not exist) "
123+
"(default: %(default)s)",
124+
}, {
125+
"flags": ["--out-symlink"],
126+
"metavar": "D",
127+
"type": pathlib.Path,
128+
"default":
129+
benchcomp.entry.run.get_default_out_prefix() /
130+
benchcomp.entry.run.get_default_out_symlink(),
131+
"help":
132+
"symbolically link D to the output directory "
133+
"(default: %(default)s)",
134+
}],
135+
},
136+
"collate": {
137+
"args": [{
138+
"flags": ["--suites-dir"],
139+
"metavar": "D",
140+
"type": _existing_directory,
141+
"default":
142+
benchcomp.entry.run.get_default_out_prefix() /
143+
benchcomp.entry.run.get_default_out_symlink(),
144+
"help":
145+
"directory containing suite.yaml files "
146+
"(default: %(default)s)"
147+
}, {
148+
"flags": ["--out-file"],
149+
"metavar": "F",
150+
"default": benchcomp.Outfile("result.yaml"),
151+
"type": benchcomp.Outfile,
152+
"help":
153+
"write result to F instead of %(default)s. "
154+
"'-' means print to stdout",
155+
}],
156+
},
157+
"filter": {
158+
"help": "transform a result by piping it through a program",
159+
"args": [],
160+
},
161+
"visualize": {
162+
"help": "render a result in various formats",
163+
"args": [{
164+
"flags": ["--result-file"],
165+
"metavar": "F",
166+
"default": pathlib.Path("result.yaml"),
167+
"type": pathlib.Path,
168+
"help":
169+
"read result from F instead of %(default)s. "
170+
}],
171+
},
172+
}
173+
}
174+
}
175+
for subcommand, info in ret["subparsers"]["parsers"].items():
176+
info["epilog"] = epilogs[subcommand]
177+
info["formatter_class"] = argparse.RawDescriptionHelpFormatter
178+
return ret
179+
180+
181+
def _get_global_args():
182+
return [{
183+
"flags": ["-c", "--config"],
184+
"default": "benchcomp.yaml",
185+
"type": benchcomp.ConfigFile,
186+
"metavar": "F",
187+
"help": "read configuration from file F (default: %(default)s)",
188+
}, {
189+
"flags": ["-v", "--verbose"],
190+
"action": "store_true",
191+
"help": "enable verbose output",
192+
}]
193+
194+
195+
def get():
196+
ad = _get_args_dict()
197+
parser = argparse.ArgumentParser(**ad["top_level"])
198+
199+
parser.set_defaults(func=benchcomp.entry.benchcomp.main)
200+
201+
global_args = _get_global_args()
202+
203+
ad["args"].extend(global_args)
204+
for arg in ad["args"]:
205+
flags = arg.pop("flags")
206+
parser.add_argument(*flags, **arg)
207+
208+
subparsers = ad["subparsers"].pop("parsers")
209+
subs = parser.add_subparsers(**ad["subparsers"])
210+
for subcommand, info in subparsers.items():
211+
args = info.pop("args")
212+
subparser = subs.add_parser(name=subcommand, **info)
213+
214+
# Set entrypoint to benchcomp.entry.visualize.main()
215+
# when user invokes `benchcomp visualize`, etc
216+
mod = importlib.import_module(f"benchcomp.entry.{subcommand}")
217+
subparser.set_defaults(func=mod.main)
218+
219+
for arg in args:
220+
flags = arg.pop("flags")
221+
subparser.add_argument(*flags, **arg)
222+
if arg not in global_args:
223+
parser.add_argument(*flags, **arg)
224+
225+
return parser.parse_args()
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
Each file X.py in this directory contains a `main` method, which
2+
bin/benchcomp will call when you run `benchcomp X`. Running `benchcomp`
3+
with no arguments will invoke the `main` method in `benchcomp.py`.
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
# Copyright Kani Contributors
2+
# SPDX-License-Identifier: Apache-2.0 OR MIT
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
# Copyright Kani Contributors
2+
# SPDX-License-Identifier: Apache-2.0 OR MIT
3+
#
4+
# Entrypoint when running `benchcomp` with no arguments. This runs the other
5+
# subcommands in sequence, for a single-command way of running, comparing, and
6+
# post-processing the suites from a single reproducible config file.
7+
8+
9+
import benchcomp.entry.collate
10+
import benchcomp.entry.run
11+
12+
13+
def main(args):
14+
run_result = benchcomp.entry.run.main(args)
15+
16+
args.suites_dir = run_result.out_prefix / run_result.out_symlink
17+
results = benchcomp.entry.collate.main(args)

0 commit comments

Comments
 (0)