From 6731d0a5ed818121ec98bf4f3cd99e5a0455aef3 Mon Sep 17 00:00:00 2001 From: Justin Bassett Date: Thu, 8 Apr 2021 22:55:51 -0700 Subject: [PATCH 01/45] Add script to find "negative blocks" Blocks whose node examined count increased --- util/misc/find-negative-nodes-examined.py | 35 +++++++++++++++++++++++ 1 file changed, 35 insertions(+) create mode 100755 util/misc/find-negative-nodes-examined.py diff --git a/util/misc/find-negative-nodes-examined.py b/util/misc/find-negative-nodes-examined.py new file mode 100755 index 00000000..e5d823f9 --- /dev/null +++ b/util/misc/find-negative-nodes-examined.py @@ -0,0 +1,35 @@ +#!/usr/bin/env python3 + +from typing import * +import argparse +import analyze +from analyze import Block, Logs + + +def nodes_examined(block: Block) -> int: + return block.single('NodeExamineCount')['num_nodes'] if 'NodeExamineCount' in block else 0 + + +def is_negative_nodes_examined(first: Block, second: Block) -> bool: + return nodes_examined(first) < nodes_examined(second) + + +def find_negative_nodes_examined(first: Logs, second: Logs) -> List[Tuple[Block, Block, int, int]]: + return [ + (f, s, nodes_examined(f), nodes_examined(s)) for f, s in zip(first, second) + if is_negative_nodes_examined(f, s) + ] + + +if __name__ == '__main__': + parser = argparse.ArgumentParser( + description='Finds all blocks for which nodes_examined(first_logs) < nodes_examined(second_logs)') + parser.add_argument('first', help='The first logs') + parser.add_argument('second', help='The second logs') + args = analyze.parse_args(parser, 'first', 'second') + + negatives = find_negative_nodes_examined(args.first, args.second) + negatives = sorted(negatives, key=lambda x: x[3] - x[2]) + for fblock, sblock, num_f, num_s in negatives: + print( + f"{fblock.info['benchmark']} {fblock.name} : {num_f} - {num_s} = {num_f - num_s}") From ef9713ec9605af3883a374e21c201ea3a70b4b78 Mon Sep 17 00:00:00 2001 From: Justin Bassett Date: Thu, 8 Apr 2021 23:17:24 -0700 Subject: [PATCH 02/45] Add script to compute block stats --- util/analyze/__init__.py | 3 +- util/analyze/lib/block_stats.py | 77 +++++++++++++++++++ .../lib/find_negative_nodes_examined.py} | 0 util/analyze/{_utils.py => utils.py} | 7 ++ 4 files changed, 86 insertions(+), 1 deletion(-) create mode 100755 util/analyze/lib/block_stats.py rename util/{misc/find-negative-nodes-examined.py => analyze/lib/find_negative_nodes_examined.py} (100%) rename util/analyze/{_utils.py => utils.py} (92%) diff --git a/util/analyze/__init__.py b/util/analyze/__init__.py index 63b66c2b..f85cd924 100644 --- a/util/analyze/__init__.py +++ b/util/analyze/__init__.py @@ -1,4 +1,5 @@ from ._types import Logs, Benchmark, Block from ._main import parse_args from .imports import import_cpu2006, import_plaidml, import_shoc, import_utils -from ._utils import * +from . import utils +from .utils import foreach_bench diff --git a/util/analyze/lib/block_stats.py b/util/analyze/lib/block_stats.py new file mode 100755 index 00000000..ee6d03e5 --- /dev/null +++ b/util/analyze/lib/block_stats.py @@ -0,0 +1,77 @@ +#!/usr/bin/env python3 + +from typing import * +import argparse +import analyze +from analyze import Block, Logs, utils + + +def is_enumerated(blk: Block) -> bool: + return 'Enumerating' in blk + + +def is_optimal(blk: Block) -> bool: + return 'DagSolvedOptimally' in blk + + +def is_timed_out(blk: Block) -> bool: + return 'DagTimedOut' in blk + + +def cost_improvement_for_blk(blk: Block) -> int: + if 'DagSolvedOptimally' in blk: + return blk.single('DagSolvedOptimally')['cost_improvement'] + elif 'DagTimedOut' in blk: + return blk.single('DagTimedOut')['cost_improvement'] + else: + return 0 + + +def is_improved(blk: Block) -> bool: + return cost_improvement_for_blk(blk) > 0 + + +def nodes_examined_for_blk(blk: Block) -> int: + return blk.single('NodeExamineCount')['num_nodes'] if 'NodeExamineCount' in blk else 0 + + +def num_blocks(logs: Logs) -> int: + return sum(len(bench.blocks) for bench in logs.benchmarks) + + +def num_enumerated(logs: Logs) -> int: + return sum(1 for blk in logs if is_enumerated(blk)) + + +def nodes_examined(logs: Logs) -> int: + return sum(nodes_examined_for_blk(blk) for blk in logs) + + +def compute_block_stats(logs: Logs): + return { + 'num blocks': num_blocks(logs), + 'num blocks enumerated': num_enumerated(logs), + 'num optimal and improved': utils.count(blk for blk in logs if is_optimal(blk) and is_improved(blk)), + 'num optimal and not improved': utils.count(blk for blk in logs if is_optimal(blk) and not is_improved(blk)), + 'num not optimal and improved': utils.count(blk for blk in logs if not is_optimal(blk) and is_improved(blk)), + 'num not optimal and not improved': utils.count(blk for blk in logs if not is_optimal(blk) and not is_improved(blk)), + 'nodes examined': nodes_examined(logs), + } + + +if __name__ == '__main__': + import sys + import csv + + parser = argparse.ArgumentParser( + description='Computes the block stats for the logs') + parser.add_argument('logs', help='The logs to analyze') + args = analyze.parse_args(parser, 'logs') + + results = utils.foreach_bench(compute_block_stats, args.logs) + + writer = csv.DictWriter(sys.stdout, + fieldnames=['Benchmark'] + list(results['Total'].keys())) + writer.writeheader() + for bench, bench_res in results.items(): + writer.writerow({'Benchmark': bench, **bench_res}) diff --git a/util/misc/find-negative-nodes-examined.py b/util/analyze/lib/find_negative_nodes_examined.py similarity index 100% rename from util/misc/find-negative-nodes-examined.py rename to util/analyze/lib/find_negative_nodes_examined.py diff --git a/util/analyze/_utils.py b/util/analyze/utils.py similarity index 92% rename from util/analyze/_utils.py rename to util/analyze/utils.py index 0ff6615a..5b3e9daa 100644 --- a/util/analyze/_utils.py +++ b/util/analyze/utils.py @@ -37,3 +37,10 @@ def foreach_bench(analysis_f, *logs, combine=None): 'Total': total, **bench_stats, } + + +def count(iter): + try: + return len(iter) + except: + return sum(1 for _ in iter) From 9c26a8d108d177cfdf0ce06cf11e29fe32ae89ba Mon Sep 17 00:00:00 2001 From: Justin Bassett Date: Thu, 8 Apr 2021 23:23:04 -0700 Subject: [PATCH 03/45] Improve script to find negative blocks --- util/analyze/lib/find_negative_nodes_examined.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/util/analyze/lib/find_negative_nodes_examined.py b/util/analyze/lib/find_negative_nodes_examined.py index e5d823f9..c72957ee 100755 --- a/util/analyze/lib/find_negative_nodes_examined.py +++ b/util/analyze/lib/find_negative_nodes_examined.py @@ -14,10 +14,12 @@ def is_negative_nodes_examined(first: Block, second: Block) -> bool: return nodes_examined(first) < nodes_examined(second) -def find_negative_nodes_examined(first: Logs, second: Logs) -> List[Tuple[Block, Block, int, int]]: +def find_negative_nodes_examined(first: Logs, second: Logs, percent_threshold: float = 0, absolute_threshold: float = 0) -> List[Tuple[Block, Block, int, int]]: return [ (f, s, nodes_examined(f), nodes_examined(s)) for f, s in zip(first, second) if is_negative_nodes_examined(f, s) + and nodes_examined(s) - nodes_examined(f) < absolute_threshold + and (nodes_examined(s) - nodes_examined(f)) / nodes_examined(f) * 100 < percent_threshold ] @@ -26,9 +28,14 @@ def find_negative_nodes_examined(first: Logs, second: Logs) -> List[Tuple[Block, description='Finds all blocks for which nodes_examined(first_logs) < nodes_examined(second_logs)') parser.add_argument('first', help='The first logs') parser.add_argument('second', help='The second logs') + parser.add_argument('-%', '--percent-threshold', type=float, default=0, + help='Ignore any blocks with a %%-difference < threshold') + parser.add_argument('-$', '--absolute-threshold', type=float, default=0, + help='Ignore any blocks with a difference < threshold') args = analyze.parse_args(parser, 'first', 'second') - negatives = find_negative_nodes_examined(args.first, args.second) + negatives = find_negative_nodes_examined( + args.first, args.second, percent_threshold=args.percent_threshold, absolute_threshold=args.absolute_threshold) negatives = sorted(negatives, key=lambda x: x[3] - x[2]) for fblock, sblock, num_f, num_s in negatives: print( From 5939232a6bde38df6ee3a49b05d160fa578981bb Mon Sep 17 00:00:00 2001 From: Justin Bassett Date: Fri, 9 Apr 2021 00:19:50 -0700 Subject: [PATCH 04/45] Transfer gt-analysis script --- util/analyze/lib/compile_times.py | 4 +- util/analyze/utils.py | 33 ++++++++ util/gt_analysis/__init__.py | 2 + util/gt_analysis/gt_cmp.py | 120 ++++++++++++++++++++++++++++ util/gt_analysis/gt_cmp_opt_only.py | 33 ++++++++ 5 files changed, 190 insertions(+), 2 deletions(-) create mode 100644 util/gt_analysis/__init__.py create mode 100755 util/gt_analysis/gt_cmp.py create mode 100755 util/gt_analysis/gt_cmp_opt_only.py diff --git a/util/analyze/lib/compile_times.py b/util/analyze/lib/compile_times.py index 312a96ab..b2c20d2c 100755 --- a/util/analyze/lib/compile_times.py +++ b/util/analyze/lib/compile_times.py @@ -15,7 +15,7 @@ def _block_time(block: Block): return end - start -def instruction_scheduling_time(logs): +def sched_time(logs): return sum(_block_time(blk) for blk in logs) @@ -35,7 +35,7 @@ def total_compile_time_seconds(logs): parser.add_argument('logs', help='The logs to analyze') args = analyze.parse_args(parser, 'logs') - fn = total_compile_time_seconds if args.variant == 'total' else instruction_scheduling_time + fn = total_compile_time_seconds if args.variant == 'total' else sched_time results = foreach_bench(fn, args.logs, combine=sum) writer = csv.DictWriter(sys.stdout, fieldnames=results.keys()) writer.writeheader() diff --git a/util/analyze/utils.py b/util/analyze/utils.py index 5b3e9daa..31576f79 100644 --- a/util/analyze/utils.py +++ b/util/analyze/utils.py @@ -44,3 +44,36 @@ def count(iter): return len(iter) except: return sum(1 for _ in iter) + + +def zipped_keep_blocks_if(*logs, pred): + ''' + Given: + a: [blk1, blk2, blk3, ...] # of type Logs + b: [blk1, blk2, blk3, ...] # of type Logs + c: [blk1, blk2, blk3, ...] # of type Logs + ... + + Returns: + [ + (a.blk1, b.blk1, c.blk1, ...) if pred(a.blk1, b.blk1, c.blk1, ...) + ... + ] + + Also supports pred(b), in which case it's all(pred(b) for b in (a.blk1, b.blk1, ...)) + ''' + + try: + all_p = set(blks[0].uniqueid() for blks in zip(*logs) if pred(*blks)) + except TypeError: + all_p = set(blks[0].uniqueid() + for blks in zip(*logs) if all(pred(b) for b in blks)) + + filtered = tuple(log.keep_blocks_if( + lambda blk: blk.uniqueid() in all_p) for log in logs) + + return filtered + + +def sum_stat_for_all(stat, logs: Logs) -> int: + return sum(stat(blk) for blk in logs) diff --git a/util/gt_analysis/__init__.py b/util/gt_analysis/__init__.py new file mode 100644 index 00000000..115122e5 --- /dev/null +++ b/util/gt_analysis/__init__.py @@ -0,0 +1,2 @@ +from . import gt_cmp +from . import gt_cmp_opt_only diff --git a/util/gt_analysis/gt_cmp.py b/util/gt_analysis/gt_cmp.py new file mode 100755 index 00000000..31dc313c --- /dev/null +++ b/util/gt_analysis/gt_cmp.py @@ -0,0 +1,120 @@ +#!/usr/bin/env python3 + +import argparse + +import analyze +from analyze import Block, Logs, utils +from analyze.lib import block_stats, compile_times + +sched_time = compile_times.sched_time + + +def blocks_enumerated_optimally(blocks): + return [blk for blk in blocks if 'DagSolvedOptimally' in blk or 'HeuristicScheduleOptimal' in blk] + + +def rp_ilp_gt_elapsed_for_blk(blk: Block) -> int: + if 'GraphTransOccupancyPreservingILPNodeSuperiority' not in blk: + return 0 + return blk.single('GraphTransOccupancyPreservingILPNodeSuperiorityFinished')['time'] \ + - blk.single('GraphTransOccupancyPreservingILPNodeSuperiority')['time'] + + +def rp_only_gt_elapsed_for_blk(blk: Block) -> int: + if 'GraphTransRPNodeSuperiority' not in blk: + return 0 + return blk.single('GraphTransRPNodeSuperiorityFinished')['time'] \ + - blk.single('GraphTransRPNodeSuperiority')['time'] + + +def ilp_only_gt_elapsed_for_blk(blk: Block) -> int: + if 'GraphTransILPNodeSuperiority' not in blk: + return 0 + return blk.single('GraphTransILPNodeSuperiorityFinished')['time'] \ + - blk.single('GraphTransILPNodeSuperiority')['time'] + + +def raw_gt_elapsed_for_blk(blk: Block) -> int: + return rp_ilp_gt_elapsed_for_blk(blk) \ + + rp_only_gt_elapsed_for_blk(blk) \ + + ilp_only_gt_elapsed_for_blk(blk) + + +def total_gt_elapsed_for_blk(blk: Block) -> int: + if 'GraphTransformationsStart' not in blk: + return 0 + return blk.single('GraphTransformationsFinished')['time'] \ + - blk.single('GraphTransformationsStart')['time'] + + +def elapsed_before_enumeration_for_blk(blk: Block) -> int: + assert 'CostLowerBound' in blk + return blk.single('CostLowerBound')['time'] + + +def enum_time_for_blk(blk: Block) -> int: + if 'DagSolvedOptimally' not in blk: + return 0 + return blk.single('DagSolvedOptimally')['time'] - blk['Enumerating'][0]['time'] + + +def cost_for_blk(blk: Block) -> int: + return blk.single('BestResult')['cost'] + blk.single('CostLowerBound')['cost'] + + +def is_improved(before: Block, after: Block): + return cost_for_blk(before) > cost_for_blk(after) + + +def compute_stats(nogt: Logs, gt: Logs): + nogt_enum, gt_enum = utils.zipped_keep_blocks_if( + nogt, gt, pred=block_stats.is_enumerated) + + result = { + 'nogt sched time': sched_time(nogt), + 'gt sched time': sched_time(gt), + 'nogt enum time': utils.sum_stat_for_all(enum_time_for_blk, nogt_enum), + 'gt enum time': utils.sum_stat_for_all(enum_time_for_blk, gt_enum), + 'nogt nodes examined': block_stats.nodes_examined(nogt_enum), + 'gt nodes examined': block_stats.nodes_examined(gt_enum), + 'nogt num enumerated': block_stats.num_enumerated(nogt_enum), + 'gt num enumerated': block_stats.num_enumerated(gt_enum), + + 'nogt timeout unimproved': utils.count(blk for blk in nogt_enum + if block_stats.is_timed_out(blk) + and not block_stats.is_improved(blk)), + 'gt timeout unimproved': utils.count(blk for blk in gt_enum + if block_stats.is_timed_out(blk) + and not block_stats.is_improved(blk)), + 'nogt timeout improved': utils.count(blk for blk in nogt_enum + if block_stats.is_timed_out(blk) + and block_stats.is_improved(blk)), + 'gt timeout improved': utils.count(blk for blk in gt_enum + if block_stats.is_timed_out(blk) + and block_stats.is_improved(blk)), + + 'total gt time': utils.sum_stat_for_all(total_gt_elapsed_for_blk, gt), + + 'nogt sched time (enum only)': sched_time(nogt_enum), + 'gt sched time (enum only)': sched_time(gt_enum), + } + + return result + + +if __name__ == "__main__": + import sys + import csv + + parser = argparse.ArgumentParser() + parser.add_argument('nogt') + parser.add_argument('gt') + args = analyze.parse_args(parser, 'nogt', 'gt') + + results = utils.foreach_bench(compute_stats, args.nogt, args.gt) + + writer = csv.DictWriter(sys.stdout, + fieldnames=['Benchmark'] + list(results['Total'].keys())) + writer.writeheader() + for bench, bench_res in results.items(): + writer.writerow({'Benchmark': bench, **bench_res}) diff --git a/util/gt_analysis/gt_cmp_opt_only.py b/util/gt_analysis/gt_cmp_opt_only.py new file mode 100755 index 00000000..107f0bce --- /dev/null +++ b/util/gt_analysis/gt_cmp_opt_only.py @@ -0,0 +1,33 @@ +#!/usr/bin/env python3 + +import argparse + +import analyze +from analyze import Logs, utils +from analyze.lib import block_stats +from gt_analysis import gt_cmp + +is_optimal = block_stats.is_optimal + + +def compute_stats(nogt: Logs, gt: Logs): + nogt, gt = utils.zipped_keep_blocks_if(nogt, gt, pred=is_optimal) + return gt_cmp.compute_stats(nogt, gt) + + +if __name__ == "__main__": + import sys + import csv + + parser = argparse.ArgumentParser() + parser.add_argument('nogt') + parser.add_argument('gt') + args = analyze.parse_args(parser, 'nogt', 'gt') + + results = utils.foreach_bench(compute_stats, args.nogt, args.gt) + + writer = csv.DictWriter(sys.stdout, + fieldnames=['Benchmark'] + list(results['Total'].keys())) + writer.writeheader() + for bench, bench_res in results.items(): + writer.writerow({'Benchmark': bench, **bench_res}) From 425714cb46d36357997566dc3e2e33aec9715ed2 Mon Sep 17 00:00:00 2001 From: Justin Bassett Date: Thu, 15 Apr 2021 21:41:49 -0700 Subject: [PATCH 05/45] Some more utilities --- util/analyze/_main.py | 15 +++++++++++---- util/analyze/_types.py | 26 ++++++++++++++++++++++++++ util/misc/load_logs.py | 12 ++++++++++++ 3 files changed, 49 insertions(+), 4 deletions(-) create mode 100644 util/misc/load_logs.py diff --git a/util/analyze/_main.py b/util/analyze/_main.py index e34f92ed..d4c2d7b4 100644 --- a/util/analyze/_main.py +++ b/util/analyze/_main.py @@ -80,11 +80,18 @@ def parse_args(parser: argparse.ArgumentParser, *names, args=None): args_dict = vars(args) + def parse_input(x): + if isinstance(x, str): + result = parser(x) + if blk_filter is not True: + result = result.keep_blocks_if(blk_filter) + return result + else: + assert isinstance(x, list) + return [parse_input(l) for l in x] + # Go through the logs inputs and parse them. for name in names: - result = parser(args_dict[name]) - if blk_filter is not True: - result = result.keep_blocks_if(blk_filter) - args_dict[name] = result + args_dict[name] = parse_input(args_dict[name]) return args diff --git a/util/analyze/_types.py b/util/analyze/_types.py index 8151bdc6..5575af46 100644 --- a/util/analyze/_types.py +++ b/util/analyze/_types.py @@ -48,6 +48,16 @@ def __repr__(self): def keep_blocks_if(self, p): return Logs([bench.keep_blocks_if(p) for bench in self.benchmarks]) + def find_equiv(self, blk): + uid = blk.uniqueid() + return [b for b in self.benchmark(blk.info['benchmark']) if b.uniqueid() == uid] + + def find_block(self, name, benchmark=None): + search = self + if benchmark is not None: + search = self.benchmark(benchmark) + return [b for b in search if b.name == name] + class Benchmark: ''' @@ -77,6 +87,16 @@ def __repr__(self): def keep_blocks_if(self, p): return Benchmark(self.info, [blk for blk in self.blocks if p(blk)]) + def find_equiv(self, blk): + uid = blk.uniqueid() + return [b for b in self if b.uniqueid() == uid] + + def find_block(self, name, benchmark=None): + if benchmark is not None: + if benchmark != self.name: + return [] + return [b for b in self if b.name == name] + class Block: ''' @@ -97,6 +117,9 @@ def __init__(self, info, raw_log, events): self.raw_log = raw_log self.events = events + if 'PassFinished' in self: + self.info['pass'] = self.single('PassFinished')['num'] + def single(self, event_name): ''' Gets an event with the specified name, requiring exactly one match @@ -132,3 +155,6 @@ def __repr__(self): def uniqueid(self): return frozenset(self.info.items()) + + def dump(self): + print(self.raw_log) diff --git a/util/misc/load_logs.py b/util/misc/load_logs.py new file mode 100644 index 00000000..199e6229 --- /dev/null +++ b/util/misc/load_logs.py @@ -0,0 +1,12 @@ +#!/usr/bin/env python3 + +# Intended to be used with python -i to load the logs to then be worked on in the REPL + +import argparse +import analyze + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument('logs', nargs='+', help='The logs to analyze') + args = analyze.parse_args(parser, 'logs') + logs = args.logs From 059ec661a7979a38b57d9d90c25f95c83d43b359 Mon Sep 17 00:00:00 2001 From: Justin Bassett Date: Wed, 28 Apr 2021 22:03:27 -0700 Subject: [PATCH 06/45] Fix some stats to be consistent with what they were before --- util/analyze/lib/block_stats.py | 10 +++++----- util/gt_analysis/gt_cmp.py | 1 + 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/util/analyze/lib/block_stats.py b/util/analyze/lib/block_stats.py index ee6d03e5..096e9a13 100755 --- a/util/analyze/lib/block_stats.py +++ b/util/analyze/lib/block_stats.py @@ -11,7 +11,7 @@ def is_enumerated(blk: Block) -> bool: def is_optimal(blk: Block) -> bool: - return 'DagSolvedOptimally' in blk + return 'DagSolvedOptimally' in blk or 'HeuristicScheduleOptimal' in blk def is_timed_out(blk: Block) -> bool: @@ -51,10 +51,10 @@ def compute_block_stats(logs: Logs): return { 'num blocks': num_blocks(logs), 'num blocks enumerated': num_enumerated(logs), - 'num optimal and improved': utils.count(blk for blk in logs if is_optimal(blk) and is_improved(blk)), - 'num optimal and not improved': utils.count(blk for blk in logs if is_optimal(blk) and not is_improved(blk)), - 'num not optimal and improved': utils.count(blk for blk in logs if not is_optimal(blk) and is_improved(blk)), - 'num not optimal and not improved': utils.count(blk for blk in logs if not is_optimal(blk) and not is_improved(blk)), + 'num optimal and improved': utils.count(blk for blk in logs if is_optimal(blk) and is_improved(blk) and is_enumerated(blk)), + 'num optimal and not improved': utils.count(blk for blk in logs if is_optimal(blk) and not is_improved(blk) and is_enumerated(blk)), + 'num not optimal and improved': utils.count(blk for blk in logs if not is_optimal(blk) and is_improved(blk) and is_enumerated(blk)), + 'num not optimal and not improved': utils.count(blk for blk in logs if not is_optimal(blk) and not is_improved(blk) and is_enumerated(blk)), 'nodes examined': nodes_examined(logs), } diff --git a/util/gt_analysis/gt_cmp.py b/util/gt_analysis/gt_cmp.py index 31dc313c..e9c07514 100755 --- a/util/gt_analysis/gt_cmp.py +++ b/util/gt_analysis/gt_cmp.py @@ -71,6 +71,7 @@ def compute_stats(nogt: Logs, gt: Logs): nogt, gt, pred=block_stats.is_enumerated) result = { + 'num regions': utils.count(nogt), 'nogt sched time': sched_time(nogt), 'gt sched time': sched_time(gt), 'nogt enum time': utils.sum_stat_for_all(enum_time_for_blk, nogt_enum), From 97e82dab522f8fd62b86bb8796aedc612e63bb4b Mon Sep 17 00:00:00 2001 From: Justin Bassett Date: Thu, 29 Apr 2021 17:19:41 -0700 Subject: [PATCH 07/45] Improve load_logs script Drops into an interpreter even if not run with python -i --- util/misc/load_logs.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) mode change 100644 => 100755 util/misc/load_logs.py diff --git a/util/misc/load_logs.py b/util/misc/load_logs.py old mode 100644 new mode 100755 index 199e6229..bf4420d1 --- a/util/misc/load_logs.py +++ b/util/misc/load_logs.py @@ -1,12 +1,19 @@ #!/usr/bin/env python3 -# Intended to be used with python -i to load the logs to then be worked on in the REPL - import argparse +import sys import analyze +__INTERACTIVE = bool(getattr(sys, 'ps1', sys.flags.interactive)) + if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument('logs', nargs='+', help='The logs to analyze') args = analyze.parse_args(parser, 'logs') logs = args.logs + + if __INTERACTIVE: + print('Parsed logs into variable `logs`') + else: + import code + code.interact(banner='Parsed logs into variable `logs`', exitmsg='', local={'logs': logs}) From f1c816aef10d266316f20336051d6080d7594dd5 Mon Sep 17 00:00:00 2001 From: Justin Bassett Date: Thu, 29 Apr 2021 17:53:41 -0700 Subject: [PATCH 08/45] Improve phrasing --- util/gt_analysis/gt_cmp.py | 54 +++++++++++++++++++------------------- 1 file changed, 27 insertions(+), 27 deletions(-) diff --git a/util/gt_analysis/gt_cmp.py b/util/gt_analysis/gt_cmp.py index e9c07514..da2b8105 100755 --- a/util/gt_analysis/gt_cmp.py +++ b/util/gt_analysis/gt_cmp.py @@ -71,33 +71,33 @@ def compute_stats(nogt: Logs, gt: Logs): nogt, gt, pred=block_stats.is_enumerated) result = { - 'num regions': utils.count(nogt), - 'nogt sched time': sched_time(nogt), - 'gt sched time': sched_time(gt), - 'nogt enum time': utils.sum_stat_for_all(enum_time_for_blk, nogt_enum), - 'gt enum time': utils.sum_stat_for_all(enum_time_for_blk, gt_enum), - 'nogt nodes examined': block_stats.nodes_examined(nogt_enum), - 'gt nodes examined': block_stats.nodes_examined(gt_enum), - 'nogt num enumerated': block_stats.num_enumerated(nogt_enum), - 'gt num enumerated': block_stats.num_enumerated(gt_enum), - - 'nogt timeout unimproved': utils.count(blk for blk in nogt_enum - if block_stats.is_timed_out(blk) - and not block_stats.is_improved(blk)), - 'gt timeout unimproved': utils.count(blk for blk in gt_enum - if block_stats.is_timed_out(blk) - and not block_stats.is_improved(blk)), - 'nogt timeout improved': utils.count(blk for blk in nogt_enum - if block_stats.is_timed_out(blk) - and block_stats.is_improved(blk)), - 'gt timeout improved': utils.count(blk for blk in gt_enum - if block_stats.is_timed_out(blk) - and block_stats.is_improved(blk)), - - 'total gt time': utils.sum_stat_for_all(total_gt_elapsed_for_blk, gt), - - 'nogt sched time (enum only)': sched_time(nogt_enum), - 'gt sched time (enum only)': sched_time(gt_enum), + 'Num Blocks': utils.count(nogt), + 'Sched Time (No GT)': sched_time(nogt), + 'Sched Time (GT)': sched_time(gt), + 'Enum Time (No GT)': utils.sum_stat_for_all(enum_time_for_blk, nogt_enum), + 'Enum Time (GT)': utils.sum_stat_for_all(enum_time_for_blk, gt_enum), + 'Nodes Examined (No GT)': block_stats.nodes_examined(nogt_enum), + 'Nodes Examined (GT)': block_stats.nodes_examined(gt_enum), + 'Num Blocks Enum (No GT)': block_stats.num_enumerated(nogt_enum), + 'Num Blocks Enum (GT)': block_stats.num_enumerated(gt_enum), + + 'Num Timeout Unimproved (No GT)': utils.count(blk for blk in nogt_enum + if block_stats.is_timed_out(blk) + and not block_stats.is_improved(blk)), + 'Num Timeout Unimproved (GT)': utils.count(blk for blk in gt_enum + if block_stats.is_timed_out(blk) + and not block_stats.is_improved(blk)), + 'Num Timeout Improved (No GT)': utils.count(blk for blk in nogt_enum + if block_stats.is_timed_out(blk) + and block_stats.is_improved(blk)), + 'Num Timeout Improved (GT)': utils.count(blk for blk in gt_enum + if block_stats.is_timed_out(blk) + and block_stats.is_improved(blk)), + + 'Total GT Time': utils.sum_stat_for_all(total_gt_elapsed_for_blk, gt), + + 'Sched Time (enum only) (No GT)': sched_time(nogt_enum), + 'Sched Time (enum only) (GT)': sched_time(gt_enum), } return result From ebdacf7cab0f69683a97dbbfeb86e2ef441248f1 Mon Sep 17 00:00:00 2001 From: Justin Bassett Date: Mon, 31 May 2021 18:58:48 -0600 Subject: [PATCH 09/45] Add metric for block cost --- util/analyze/lib/block_stats.py | 16 ++++++++++++++++ util/gt_analysis/gt_cmp.py | 4 ++++ 2 files changed, 20 insertions(+) diff --git a/util/analyze/lib/block_stats.py b/util/analyze/lib/block_stats.py index 096e9a13..b1430b68 100755 --- a/util/analyze/lib/block_stats.py +++ b/util/analyze/lib/block_stats.py @@ -18,6 +18,22 @@ def is_timed_out(blk: Block) -> bool: return 'DagTimedOut' in blk +def block_cost_lower_bound(blk: Block) -> int: + return blk.single('CostLowerBound')['cost'] + + +def block_relative_cost(blk: Block) -> int: + return blk.single('BestResult')['cost'] + + +def block_best_length(blk: Block) -> int: + return blk.single('BestResult')['length'] + + +def block_cost(blk: Block) -> int: + return block_cost_lower_bound(blk) + block_relative_cost(blk) + + def cost_improvement_for_blk(blk: Block) -> int: if 'DagSolvedOptimally' in blk: return blk.single('DagSolvedOptimally')['cost_improvement'] diff --git a/util/gt_analysis/gt_cmp.py b/util/gt_analysis/gt_cmp.py index da2b8105..9428e99c 100755 --- a/util/gt_analysis/gt_cmp.py +++ b/util/gt_analysis/gt_cmp.py @@ -72,6 +72,10 @@ def compute_stats(nogt: Logs, gt: Logs): result = { 'Num Blocks': utils.count(nogt), + 'Block Cost (No GT)': utils.sum_stat_for_all(block_stats.block_cost, nogt_enum), + 'Block Cost (GT)': utils.sum_stat_for_all(block_stats.block_cost, gt_enum), + 'Block Cost - Relative (No GT)': utils.sum_stat_for_all(block_stats.block_relative_cost, nogt_enum), + 'Block Cost - Relative (GT)': utils.sum_stat_for_all(block_stats.block_relative_cost, gt_enum), 'Sched Time (No GT)': sched_time(nogt), 'Sched Time (GT)': sched_time(gt), 'Enum Time (No GT)': utils.sum_stat_for_all(enum_time_for_blk, nogt_enum), From 848ef7375d2b3d3d0a2f0ec9d2edfb6c7e57451e Mon Sep 17 00:00:00 2001 From: Justin Bassett Date: Mon, 31 May 2021 19:06:34 -0600 Subject: [PATCH 10/45] Remove irrelevant metrics --- util/analyze/_types.py | 6 ++++++ util/gt_analysis/gt_cmp.py | 44 +++++++++++++++++--------------------- 2 files changed, 26 insertions(+), 24 deletions(-) diff --git a/util/analyze/_types.py b/util/analyze/_types.py index 5575af46..01920d13 100644 --- a/util/analyze/_types.py +++ b/util/analyze/_types.py @@ -41,6 +41,9 @@ def __iter__(self): for bench in self.benchmarks: yield from bench.blocks + def __len__(self): + return sum(len(bench) for bench in self.benchmarks) + def __repr__(self): benchmarks = ','.join(b.name for b in self.benchmarks) return f'' @@ -77,6 +80,9 @@ def __init__(self, info, blocks): def __iter__(self): return iter(self.blocks) + def __len__(self): + return len(self.blocks) + @property def benchmarks(self): return (self,) diff --git a/util/gt_analysis/gt_cmp.py b/util/gt_analysis/gt_cmp.py index 9428e99c..139dcd8a 100755 --- a/util/gt_analysis/gt_cmp.py +++ b/util/gt_analysis/gt_cmp.py @@ -67,41 +67,37 @@ def is_improved(before: Block, after: Block): def compute_stats(nogt: Logs, gt: Logs): - nogt_enum, gt_enum = utils.zipped_keep_blocks_if( + TOTAL_BLOCKS = utils.count(nogt) + + nogt, gt = utils.zipped_keep_blocks_if( nogt, gt, pred=block_stats.is_enumerated) result = { - 'Num Blocks': utils.count(nogt), - 'Block Cost (No GT)': utils.sum_stat_for_all(block_stats.block_cost, nogt_enum), - 'Block Cost (GT)': utils.sum_stat_for_all(block_stats.block_cost, gt_enum), - 'Block Cost - Relative (No GT)': utils.sum_stat_for_all(block_stats.block_relative_cost, nogt_enum), - 'Block Cost - Relative (GT)': utils.sum_stat_for_all(block_stats.block_relative_cost, gt_enum), - 'Sched Time (No GT)': sched_time(nogt), - 'Sched Time (GT)': sched_time(gt), - 'Enum Time (No GT)': utils.sum_stat_for_all(enum_time_for_blk, nogt_enum), - 'Enum Time (GT)': utils.sum_stat_for_all(enum_time_for_blk, gt_enum), - 'Nodes Examined (No GT)': block_stats.nodes_examined(nogt_enum), - 'Nodes Examined (GT)': block_stats.nodes_examined(gt_enum), - 'Num Blocks Enum (No GT)': block_stats.num_enumerated(nogt_enum), - 'Num Blocks Enum (GT)': block_stats.num_enumerated(gt_enum), - - 'Num Timeout Unimproved (No GT)': utils.count(blk for blk in nogt_enum + 'Total Blocks in Benchsuite': TOTAL_BLOCKS, + 'Num Blocks with GT applied': utils.count(nogt), + 'Block Cost (No GT)': utils.sum_stat_for_all(block_stats.block_cost, nogt), + 'Block Cost (GT)': utils.sum_stat_for_all(block_stats.block_cost, gt), + 'Block Cost - Relative (No GT)': utils.sum_stat_for_all(block_stats.block_relative_cost, nogt), + 'Block Cost - Relative (GT)': utils.sum_stat_for_all(block_stats.block_relative_cost, gt), + 'Total Sched Time (No GT)': sched_time(nogt), + 'Total Sched Time (GT)': sched_time(gt), + 'Enum Time (No GT)': utils.sum_stat_for_all(enum_time_for_blk, nogt), + 'Enum Time (GT)': utils.sum_stat_for_all(enum_time_for_blk, gt), + + 'Total GT Time': utils.sum_stat_for_all(total_gt_elapsed_for_blk, gt), + + 'Num Timeout Unimproved (No GT)': utils.count(blk for blk in nogt if block_stats.is_timed_out(blk) and not block_stats.is_improved(blk)), - 'Num Timeout Unimproved (GT)': utils.count(blk for blk in gt_enum + 'Num Timeout Unimproved (GT)': utils.count(blk for blk in gt if block_stats.is_timed_out(blk) and not block_stats.is_improved(blk)), - 'Num Timeout Improved (No GT)': utils.count(blk for blk in nogt_enum + 'Num Timeout Improved (No GT)': utils.count(blk for blk in nogt if block_stats.is_timed_out(blk) and block_stats.is_improved(blk)), - 'Num Timeout Improved (GT)': utils.count(blk for blk in gt_enum + 'Num Timeout Improved (GT)': utils.count(blk for blk in gt if block_stats.is_timed_out(blk) and block_stats.is_improved(blk)), - - 'Total GT Time': utils.sum_stat_for_all(total_gt_elapsed_for_blk, gt), - - 'Sched Time (enum only) (No GT)': sched_time(nogt_enum), - 'Sched Time (enum only) (GT)': sched_time(gt_enum), } return result From 6b45b923c7ef287b27f4a3c8f49095ad1197e790 Mon Sep 17 00:00:00 2001 From: Justin Bassett Date: Tue, 15 Jun 2021 21:13:36 -0700 Subject: [PATCH 11/45] [scripts] Allow multiple LBs & HeuristicResults --- util/analyze/lib/block_stats.py | 2 +- util/gt_analysis/gt_cmp.py | 4 +- util/misc/validation-test.py | 243 ++++++++++++++++---------------- 3 files changed, 127 insertions(+), 122 deletions(-) diff --git a/util/analyze/lib/block_stats.py b/util/analyze/lib/block_stats.py index b1430b68..0568ebd9 100755 --- a/util/analyze/lib/block_stats.py +++ b/util/analyze/lib/block_stats.py @@ -19,7 +19,7 @@ def is_timed_out(blk: Block) -> bool: def block_cost_lower_bound(blk: Block) -> int: - return blk.single('CostLowerBound')['cost'] + return blk['CostLowerBound'][-1]['cost'] def block_relative_cost(blk: Block) -> int: diff --git a/util/gt_analysis/gt_cmp.py b/util/gt_analysis/gt_cmp.py index 139dcd8a..9eba5d94 100755 --- a/util/gt_analysis/gt_cmp.py +++ b/util/gt_analysis/gt_cmp.py @@ -49,7 +49,7 @@ def total_gt_elapsed_for_blk(blk: Block) -> int: def elapsed_before_enumeration_for_blk(blk: Block) -> int: assert 'CostLowerBound' in blk - return blk.single('CostLowerBound')['time'] + return blk['CostLowerBound'][-1]['time'] def enum_time_for_blk(blk: Block) -> int: @@ -59,7 +59,7 @@ def enum_time_for_blk(blk: Block) -> int: def cost_for_blk(blk: Block) -> int: - return blk.single('BestResult')['cost'] + blk.single('CostLowerBound')['cost'] + return blk.single('BestResult')['cost'] + blk['CostLowerBound'][-1]['cost'] def is_improved(before: Block, after: Block): diff --git a/util/misc/validation-test.py b/util/misc/validation-test.py index b4e15f79..2373c104 100755 --- a/util/misc/validation-test.py +++ b/util/misc/validation-test.py @@ -7,37 +7,36 @@ import os, sys import itertools +from typing import List +import argparse -sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) -from readlogs import * +import analyze +from analyze import Logs # Explain this many of the blocks missing a lower bound MISSING_LOWER_BOUND_DUMP_COUNT = 3 MISSING_LOWER_BOUND_DUMP_LINES = 10 -dags1 = {} -dags2 = {} - -def dags_info(logtext): +def dags_info(logs: Logs): dags = {} - unfiltered = [keep_only_singular_events(block) for block in parse_blocks(logtext)] - blocks = [block for block in unfiltered if 'CostLowerBound' in block] + blocks = list(logs) + + no_lb = [block for block in blocks if 'CostLowerBound' not in block] - if len(blocks) != len(unfiltered): + if no_lb: print('WARNING: Missing a logged lower bound for {missing}/{total} blocks.' - .format(missing=len(unfiltered) - len(blocks), total=len(unfiltered)), file=sys.stderr) + .format(missing=len(no_lb), total=len(blocks)), file=sys.stderr) - missing = set(unfiltered) - set(blocks) - trimmed = ('\n'.join(block.splitlines()[:MISSING_LOWER_BOUND_DUMP_LINES]) for block in missing) + trimmed = ('\n'.join(block.raw_log.splitlines()[:MISSING_LOWER_BOUND_DUMP_LINES]) for block in no_lb) for i, block in enumerate(itertools.islice(trimmed, MISSING_LOWER_BOUND_DUMP_COUNT)): print('WARNING: block {} missing lower-bound:\n{}\n...'.format(i, block), file=sys.stderr) for block in blocks: - lowerBound = block['CostLowerBound']['cost'] - blockInfo = block['BestResult'] + lowerBound = block['CostLowerBound'][-1]['cost'] + blockInfo = block.single('BestResult') dagName = blockInfo['name'] dags[dagName] = { 'lowerBound': lowerBound, @@ -50,108 +49,114 @@ def dags_info(logtext): return dags -with open(str(sys.argv[1])) as logfile1: - dags1 = dags_info(logfile1.read()) - -with open(str(sys.argv[2])) as logfile2: - dags2 = dags_info(logfile2.read()) - -numDagsLog1 = len(dags1) -numDagsLog2 = len(dags2) -# The number of blocks that are optimal in both logs. -optimalInBoth = 0 -# The number of blocks that are only optimal in log 1. -optimalLog1 = 0 -# The number of blocks that are only optimal in log 2. -optimalLog2 = 0 -# Mismatches where blocks are optimal in both logs but have different costs. -misNonEqual = 0 -# Mismatches where block is optimal in log 1 but it has a higher cost than the non-optimal block in log 2. -misBlk1Opt = 0 -# Mismatches where block is optimal in log 2 but it has a higher cost than the non-optimal block in log 1. -misBlk2Opt = 0 -# The quantity of blocks with the largest mismatches to print. -numLarMisPrt = 10 -# The quantity of mismatched blocks with the shortest length to print. -numSmlBlkPrt = 50 -# Dictionary with the sizes of the mismatches for each mismatched block and the size of the block. -mismatches = {} - - - -if numDagsLog1 != numDagsLog2: - print('Error: Different number of dags in each log file.') - -for dagName in dags1: - if dagName not in dags2: - print('Error: Could not find ' + dagName + ' in the second log file.') - continue - - dag1 = dags1[dagName] - dag2 = dags2[dagName] - if dag1['isOptimal'] and dag2['isOptimal']: - optimalInBoth+=1 - if dag1['cost'] != dag2['cost']: - # There was a mismatch where blocks are optimal in both logs but have different costs - misNonEqual += 1 - mismatches[dagName] = {} - mismatches[dagName]['length'] = dag1['length'] - mismatches[dagName]['misSize'] = abs(dag1['cost'] - dag2['cost']) - #print('Mismatch for dag ' + dagName + ' (Both optimal with non-equal cost)') - - elif dag1['isOptimal']: - optimalLog1+=1 - if dag1['cost'] > dag2['cost']: - # There was a mismatch where block is optimal in log 1 but it has a higher cost than the non-optimal block in log 2 - misBlk1Opt += 1 - mismatches[dagName] = {} - mismatches[dagName]['length'] = dag1['length'] - mismatches[dagName]['misSize'] = dag1['cost'] - dag2['cost'] - #print('Mismatch for dag ' + dagName + ' (Only optimal in log 1 but has higher cost than the non-optimal block in log 2)') - - elif dag2['isOptimal']: - optimalLog2+=1 - if dag2['cost'] > dag1['cost']: - # There was a mismatch where block is optimal in log 2 but it has a higher cost than the non-optimal block in log 1 - misBlk2Opt += 1 - mismatches[dagName] = {} - mismatches[dagName]['length'] = dag1['length'] - mismatches[dagName]['misSize'] = dag2['cost'] - dag1['cost'] - #print('Mismatch for dag ' + dagName + ' (Only optimal in log 2 but has higher cost than the non-optimal block in log 1)') - -print('Optimal Block Stats') -print('-----------------------------------------------------------') -print('Blocks in log file 1: ' + str(numDagsLog1)) -print('Blocks in log file 2: ' + str(numDagsLog2)) -print('Blocks that are optimal in both files: ' + str(optimalInBoth)) -print('Blocks that are optimal in log 1 but not in log 2: ' + str(optimalLog1)) -print('Blocks that are optimal in log 2 but not in log 1: ' + str(optimalLog2)) -print('----------------------------------------------------------\n') - -print('Mismatch stats') -print('-----------------------------------------------------------') -print('Mismatches where blocks are optimal in both logs but have different costs: ' + str(misNonEqual)) -print('Mismatches where the block is optimal in log 1 but it has a higher cost than the non-optimal block in log 2: ' + str(misBlk1Opt)) -print('Mismatches where the block is optimal in log 2 but it has a higher cost than the non-optimal block in log 1: ' + str(misBlk2Opt)) -print('Total mismatches: ' + str(misNonEqual + misBlk1Opt + misBlk2Opt)) -print('-----------------------------------------------------------\n') - -print('The ' + str(numLarMisPrt) + ' mismatched blocks with the largest difference in cost') -print('-----------------------------------------------------------') -sortedMaxMis = sorted(mismatches.items(), key=lambda i: (mismatches[i[0]]['misSize'], i[0]), reverse=True) -i = 1 -for block in sortedMaxMis[:numLarMisPrt]: - print(str(i) + ':') - print('Block Name: ' + block[0] + '\nLength: ' + str(block[1]['length']) + '\nDifference in cost: ' + str(block[1]['misSize'])) - i += 1 -print('-----------------------------------------------------------\n') - -print('The smallest ' + str(numSmlBlkPrt) + ' mismatched blocks') -print('-----------------------------------------------------------') -sortedMisSize = sorted(mismatches.items(), key=lambda i: (mismatches[i[0]]['length'], i[0])) -i = 1 -for block in sortedMisSize[:numSmlBlkPrt]: - print(str(i) + ':') - print('Block Name: ' + block[0] + '\nLength: ' + str(block[1]['length']) + '\nDifference in cost: ' + str(block[1]['misSize'])) - i += 1 -print('-----------------------------------------------------------') +if __name__ == "__main__": + dags1 = {} + dags2 = {} + + parser = argparse.ArgumentParser() + parser.add_argument('first') + parser.add_argument('second') + args = analyze.parse_args(parser, 'first', 'second') + + dags1 = dags_info(args.first) + dags2 = dags_info(args.second) + + numDagsLog1 = len(dags1) + numDagsLog2 = len(dags2) + # The number of blocks that are optimal in both logs. + optimalInBoth = 0 + # The number of blocks that are only optimal in log 1. + optimalLog1 = 0 + # The number of blocks that are only optimal in log 2. + optimalLog2 = 0 + # Mismatches where blocks are optimal in both logs but have different costs. + misNonEqual = 0 + # Mismatches where block is optimal in log 1 but it has a higher cost than the non-optimal block in log 2. + misBlk1Opt = 0 + # Mismatches where block is optimal in log 2 but it has a higher cost than the non-optimal block in log 1. + misBlk2Opt = 0 + # The quantity of blocks with the largest mismatches to print. + numLarMisPrt = 10 + # The quantity of mismatched blocks with the shortest length to print. + numSmlBlkPrt = 50 + # Dictionary with the sizes of the mismatches for each mismatched block and the size of the block. + mismatches = {} + + + + if numDagsLog1 != numDagsLog2: + print('Error: Different number of dags in each log file.') + + for dagName in dags1: + if dagName not in dags2: + print('Error: Could not find ' + dagName + ' in the second log file.') + continue + + dag1 = dags1[dagName] + dag2 = dags2[dagName] + if dag1['isOptimal'] and dag2['isOptimal']: + optimalInBoth+=1 + if dag1['cost'] != dag2['cost']: + # There was a mismatch where blocks are optimal in both logs but have different costs + misNonEqual += 1 + mismatches[dagName] = {} + mismatches[dagName]['length'] = dag1['length'] + mismatches[dagName]['misSize'] = abs(dag1['cost'] - dag2['cost']) + #print('Mismatch for dag ' + dagName + ' (Both optimal with non-equal cost)') + + elif dag1['isOptimal']: + optimalLog1+=1 + if dag1['cost'] > dag2['cost']: + # There was a mismatch where block is optimal in log 1 but it has a higher cost than the non-optimal block in log 2 + misBlk1Opt += 1 + mismatches[dagName] = {} + mismatches[dagName]['length'] = dag1['length'] + mismatches[dagName]['misSize'] = dag1['cost'] - dag2['cost'] + #print('Mismatch for dag ' + dagName + ' (Only optimal in log 1 but has higher cost than the non-optimal block in log 2)') + + elif dag2['isOptimal']: + optimalLog2+=1 + if dag2['cost'] > dag1['cost']: + # There was a mismatch where block is optimal in log 2 but it has a higher cost than the non-optimal block in log 1 + misBlk2Opt += 1 + mismatches[dagName] = {} + mismatches[dagName]['length'] = dag1['length'] + mismatches[dagName]['misSize'] = dag2['cost'] - dag1['cost'] + #print('Mismatch for dag ' + dagName + ' (Only optimal in log 2 but has higher cost than the non-optimal block in log 1)') + + print('Optimal Block Stats') + print('-----------------------------------------------------------') + print('Blocks in log file 1: ' + str(numDagsLog1)) + print('Blocks in log file 2: ' + str(numDagsLog2)) + print('Blocks that are optimal in both files: ' + str(optimalInBoth)) + print('Blocks that are optimal in log 1 but not in log 2: ' + str(optimalLog1)) + print('Blocks that are optimal in log 2 but not in log 1: ' + str(optimalLog2)) + print('----------------------------------------------------------\n') + + print('Mismatch stats') + print('-----------------------------------------------------------') + print('Mismatches where blocks are optimal in both logs but have different costs: ' + str(misNonEqual)) + print('Mismatches where the block is optimal in log 1 but it has a higher cost than the non-optimal block in log 2: ' + str(misBlk1Opt)) + print('Mismatches where the block is optimal in log 2 but it has a higher cost than the non-optimal block in log 1: ' + str(misBlk2Opt)) + print('Total mismatches: ' + str(misNonEqual + misBlk1Opt + misBlk2Opt)) + print('-----------------------------------------------------------\n') + + print('The ' + str(numLarMisPrt) + ' mismatched blocks with the largest difference in cost') + print('-----------------------------------------------------------') + sortedMaxMis = sorted(mismatches.items(), key=lambda i: (mismatches[i[0]]['misSize'], i[0]), reverse=True) + i = 1 + for block in sortedMaxMis[:numLarMisPrt]: + print(str(i) + ':') + print('Block Name: ' + block[0] + '\nLength: ' + str(block[1]['length']) + '\nDifference in cost: ' + str(block[1]['misSize'])) + i += 1 + print('-----------------------------------------------------------\n') + + print('The smallest ' + str(numSmlBlkPrt) + ' mismatched blocks') + print('-----------------------------------------------------------') + sortedMisSize = sorted(mismatches.items(), key=lambda i: (mismatches[i[0]]['length'], i[0])) + i = 1 + for block in sortedMisSize[:numSmlBlkPrt]: + print(str(i) + ':') + print('Block Name: ' + block[0] + '\nLength: ' + str(block[1]['length']) + '\nDifference in cost: ' + str(block[1]['misSize'])) + i += 1 + print('-----------------------------------------------------------') From 84a07e60e3479706293fa53583b789a055e5da10 Mon Sep 17 00:00:00 2001 From: Justin Bassett Date: Tue, 15 Jun 2021 21:40:42 -0700 Subject: [PATCH 12/45] Also calculate the number of nodes examined Only a good metric for blocks which are fully enumerated both with and without graph transformations. --- util/gt_analysis/gt_cmp.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/util/gt_analysis/gt_cmp.py b/util/gt_analysis/gt_cmp.py index 9eba5d94..37030f87 100755 --- a/util/gt_analysis/gt_cmp.py +++ b/util/gt_analysis/gt_cmp.py @@ -75,16 +75,18 @@ def compute_stats(nogt: Logs, gt: Logs): result = { 'Total Blocks in Benchsuite': TOTAL_BLOCKS, 'Num Blocks with GT applied': utils.count(nogt), - 'Block Cost (No GT)': utils.sum_stat_for_all(block_stats.block_cost, nogt), - 'Block Cost (GT)': utils.sum_stat_for_all(block_stats.block_cost, gt), - 'Block Cost - Relative (No GT)': utils.sum_stat_for_all(block_stats.block_relative_cost, nogt), - 'Block Cost - Relative (GT)': utils.sum_stat_for_all(block_stats.block_relative_cost, gt), + 'Num Nodes Examined (opt. blocks only) (No GT)': utils.sum_stat_for_all(block_stats.nodes_examined_for_blk, nogt), + 'Num Nodes Examined (opt. blocks only) (GT)': utils.sum_stat_for_all(block_stats.nodes_examined_for_blk, gt), + 'Total Sched Time (No GT)': sched_time(nogt), 'Total Sched Time (GT)': sched_time(gt), 'Enum Time (No GT)': utils.sum_stat_for_all(enum_time_for_blk, nogt), 'Enum Time (GT)': utils.sum_stat_for_all(enum_time_for_blk, gt), - 'Total GT Time': utils.sum_stat_for_all(total_gt_elapsed_for_blk, gt), + 'Block Cost - Relative (No GT)': utils.sum_stat_for_all(block_stats.block_relative_cost, nogt), + 'Block Cost - Relative (GT)': utils.sum_stat_for_all(block_stats.block_relative_cost, gt), + 'Block Cost (No GT)': utils.sum_stat_for_all(block_stats.block_cost, nogt), + 'Block Cost (GT)': utils.sum_stat_for_all(block_stats.block_cost, gt), 'Num Timeout Unimproved (No GT)': utils.count(blk for blk in nogt if block_stats.is_timed_out(blk) From 331b4a98074a3a3d8d2ea0cddeb958af5903048a Mon Sep 17 00:00:00 2001 From: Justin Bassett Date: Thu, 17 Jun 2021 17:14:40 -0700 Subject: [PATCH 13/45] Change metrics again --- util/gt_analysis/gt_cmp.py | 25 +++++++++++++++++++++---- 1 file changed, 21 insertions(+), 4 deletions(-) diff --git a/util/gt_analysis/gt_cmp.py b/util/gt_analysis/gt_cmp.py index 37030f87..f8c30c5c 100755 --- a/util/gt_analysis/gt_cmp.py +++ b/util/gt_analysis/gt_cmp.py @@ -7,6 +7,7 @@ from analyze.lib import block_stats, compile_times sched_time = compile_times.sched_time +total_compile_time_seconds = compile_times.total_compile_time_seconds def blocks_enumerated_optimally(blocks): @@ -54,7 +55,7 @@ def elapsed_before_enumeration_for_blk(blk: Block) -> int: def enum_time_for_blk(blk: Block) -> int: if 'DagSolvedOptimally' not in blk: - return 0 + return blk.single('DagTimedOut')['time'] - blk['Enumerating'][0]['time'] return blk.single('DagSolvedOptimally')['time'] - blk['Enumerating'][0]['time'] @@ -69,25 +70,41 @@ def is_improved(before: Block, after: Block): def compute_stats(nogt: Logs, gt: Logs): TOTAL_BLOCKS = utils.count(nogt) + nogt_all, gt_all = nogt, gt + + NUM_PROVED_OPTIMAL_WITHOUT_ENUMERATING = utils.count(utils.zipped_keep_blocks_if( + nogt, gt, pred=lambda a, b: block_stats.is_enumerated(a) and not block_stats.is_enumerated(b))[0]) nogt, gt = utils.zipped_keep_blocks_if( nogt, gt, pred=block_stats.is_enumerated) + nogt_opt, gt_opt = utils.zipped_keep_blocks_if(nogt, gt, pred=lambda b: 'DagSolvedOptimally' in b) + result = { 'Total Blocks in Benchsuite': TOTAL_BLOCKS, - 'Num Blocks with GT applied': utils.count(nogt), - 'Num Nodes Examined (opt. blocks only) (No GT)': utils.sum_stat_for_all(block_stats.nodes_examined_for_blk, nogt), - 'Num Nodes Examined (opt. blocks only) (GT)': utils.sum_stat_for_all(block_stats.nodes_examined_for_blk, gt), + 'Num Blocks enumerated with & without GT': utils.count(nogt), + 'Num Blocks proved optimal just by GT': NUM_PROVED_OPTIMAL_WITHOUT_ENUMERATING, + 'Total Compile Time (s) (all benchsuite) (No GT)': total_compile_time_seconds(nogt_all), + 'Total Compile Time (s) (all benchsuite) (GT)': total_compile_time_seconds(gt_all), 'Total Sched Time (No GT)': sched_time(nogt), 'Total Sched Time (GT)': sched_time(gt), 'Enum Time (No GT)': utils.sum_stat_for_all(enum_time_for_blk, nogt), 'Enum Time (GT)': utils.sum_stat_for_all(enum_time_for_blk, gt), 'Total GT Time': utils.sum_stat_for_all(total_gt_elapsed_for_blk, gt), + + 'Total Sched Time (opt. blks only) (No GT)': sched_time(nogt_opt), + 'Total Sched Time (opt. blks only) (GT)': sched_time(gt_opt), + 'Enum Time (opt. blocks only) (No GT)': utils.sum_stat_for_all(enum_time_for_blk, nogt_opt), + 'Enum Time (opt. blocks only) (GT)': utils.sum_stat_for_all(enum_time_for_blk, gt_opt), + 'Block Cost - Relative (No GT)': utils.sum_stat_for_all(block_stats.block_relative_cost, nogt), 'Block Cost - Relative (GT)': utils.sum_stat_for_all(block_stats.block_relative_cost, gt), 'Block Cost (No GT)': utils.sum_stat_for_all(block_stats.block_cost, nogt), 'Block Cost (GT)': utils.sum_stat_for_all(block_stats.block_cost, gt), + 'Num Nodes Examined (opt. blocks only) (No GT)': utils.sum_stat_for_all(block_stats.nodes_examined_for_blk, nogt_opt), + 'Num Nodes Examined (opt. blocks only) (GT)': utils.sum_stat_for_all(block_stats.nodes_examined_for_blk, gt_opt), + 'Num Timeout Unimproved (No GT)': utils.count(blk for blk in nogt if block_stats.is_timed_out(blk) and not block_stats.is_improved(blk)), From 6c17e81a5e6b67e679b37d8295e22f38ddce9720 Mon Sep 17 00:00:00 2001 From: Justin Bassett Date: Thu, 17 Jun 2021 18:18:57 -0700 Subject: [PATCH 14/45] Fix zipping for duplicate blocks --- util/analyze/utils.py | 34 ++++++++++++++++++++++++++-------- 1 file changed, 26 insertions(+), 8 deletions(-) diff --git a/util/analyze/utils.py b/util/analyze/utils.py index 31576f79..b43ac668 100644 --- a/util/analyze/utils.py +++ b/util/analyze/utils.py @@ -63,16 +63,34 @@ def zipped_keep_blocks_if(*logs, pred): Also supports pred(b), in which case it's all(pred(b) for b in (a.blk1, b.blk1, ...)) ''' + for group in zip(*logs): + assert len(set(g.uniqueid() for g in group)) == 1, group[0].raw_log + try: - all_p = set(blks[0].uniqueid() for blks in zip(*logs) if pred(*blks)) + blks = next(zip(*logs)) + pred(*blks) except TypeError: - all_p = set(blks[0].uniqueid() - for blks in zip(*logs) if all(pred(b) for b in blks)) - - filtered = tuple(log.keep_blocks_if( - lambda blk: blk.uniqueid() in all_p) for log in logs) - - return filtered + old_pred = pred + pred = lambda *blks: all(old_pred(b) for b in blks) + + def zip_benchmarks_if(*benchmarks): + # (A[a], A[a]) -> [(a, a)] + return [blks for blks in zip(*benchmarks) if pred(*blks)] + + # L1: [A, B, C] + # L2: [A, B, C] + # benchs: [(A, A), (B, B), (C, C)] + benchs = zip(*[l.benchmarks for l in logs]) + + # Each item: (A[a], A[a]) -> [(a, a)] inside the zip. + # zip(*[(a, a)]) -> ([a], [a]) + # zip(bench, ...): (A, A) zip ([a], [a]) -> [(A, [a]), (A, [a])] + filtered_benchs = [zip(bench, zip(*zip_benchmarks_if(*bench))) for bench in benchs] + # [ {(A, [a]), (A, [a])} ] -> [ (A[a], A[a]) ] + filtered_bench2 = [tuple(Benchmark(b.info, blks) for (b, blks) in benchs) + for benchs in filtered_benchs] + + return tuple(map(Logs, zip(*filtered_bench2))) def sum_stat_for_all(stat, logs: Logs) -> int: From cebc5472e3ba185e987a2239ebd383818e32c429 Mon Sep 17 00:00:00 2001 From: Justin Bassett Date: Mon, 21 Jun 2021 20:37:51 -0700 Subject: [PATCH 15/45] Update zipped_keep_blocks_if to support empty case --- util/analyze/utils.py | 37 ++++++++++++++++++++++--------------- 1 file changed, 22 insertions(+), 15 deletions(-) diff --git a/util/analyze/utils.py b/util/analyze/utils.py index b43ac668..a036cf45 100644 --- a/util/analyze/utils.py +++ b/util/analyze/utils.py @@ -62,6 +62,8 @@ def zipped_keep_blocks_if(*logs, pred): Also supports pred(b), in which case it's all(pred(b) for b in (a.blk1, b.blk1, ...)) ''' + if not logs: + return [] for group in zip(*logs): assert len(set(g.uniqueid() for g in group)) == 1, group[0].raw_log @@ -74,23 +76,28 @@ def zipped_keep_blocks_if(*logs, pred): pred = lambda *blks: all(old_pred(b) for b in blks) def zip_benchmarks_if(*benchmarks): - # (A[a], A[a]) -> [(a, a)] - return [blks for blks in zip(*benchmarks) if pred(*blks)] - - # L1: [A, B, C] - # L2: [A, B, C] - # benchs: [(A, A), (B, B), (C, C)] + # (A[a], A[a]) -> [(a, a)] or [] + zipped = [blks for blks in zip(*benchmarks) if pred(*blks)] + unzipped = list(zip(*zipped)) # [(a, a)] -> ([a], [a]); [] -> [] + if not unzipped: # if []: ([], []) + unzipped = [()] * len(benchmarks) + # ([a], [a]) -> (A[a], A[a]) + return [Benchmark(bench.info, bench_blks) for bench, bench_blks in zip(benchmarks, unzipped)] + + # [(Bench.X, Bench.X), (Bench.Y, Bench.Y)] + result = [] + + # L1: [A[a], B[b], C[c]] + # L2: [A[a], B[b], C[c]] + # benchs: [(A[a], A[a]), (B[b], B[b]), ...] benchs = zip(*[l.benchmarks for l in logs]) + # filtered_benchs: [(A[a], A[a]), (B[b], B[b]), ...] + filtered_benchs = (zip_benchmarks_if(*bench_grp) for bench_grp in benchs) + # [(A[a], A[a]), (B[b], B[b])] -> ([A[a], B[b], ...], [A[a], B[b], ...]) + log_benchs = zip(*filtered_benchs) + new_logs = map(Logs, log_benchs) - # Each item: (A[a], A[a]) -> [(a, a)] inside the zip. - # zip(*[(a, a)]) -> ([a], [a]) - # zip(bench, ...): (A, A) zip ([a], [a]) -> [(A, [a]), (A, [a])] - filtered_benchs = [zip(bench, zip(*zip_benchmarks_if(*bench))) for bench in benchs] - # [ {(A, [a]), (A, [a])} ] -> [ (A[a], A[a]) ] - filtered_bench2 = [tuple(Benchmark(b.info, blks) for (b, blks) in benchs) - for benchs in filtered_benchs] - - return tuple(map(Logs, zip(*filtered_bench2))) + return tuple(new_logs) def sum_stat_for_all(stat, logs: Logs) -> int: From d8d99d115e44877a625ef059c2afa9d1daa0de0c Mon Sep 17 00:00:00 2001 From: Justin Bassett Date: Tue, 22 Jun 2021 10:32:12 -0700 Subject: [PATCH 16/45] Small improvement to compile time calculation --- util/analyze/_types.py | 1 + util/analyze/lib/compile_times.py | 20 +++++++++++++++++--- 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/util/analyze/_types.py b/util/analyze/_types.py index 01920d13..22696f78 100644 --- a/util/analyze/_types.py +++ b/util/analyze/_types.py @@ -119,6 +119,7 @@ class Block: def __init__(self, info, raw_log, events): self.name = info['name'] + self.benchmark = info['benchmark'] self.info = info self.raw_log = raw_log self.events = events diff --git a/util/analyze/lib/compile_times.py b/util/analyze/lib/compile_times.py index b2c20d2c..ed322820 100755 --- a/util/analyze/lib/compile_times.py +++ b/util/analyze/lib/compile_times.py @@ -4,6 +4,7 @@ import re import argparse import sys +import logging import analyze from analyze import Block, foreach_bench @@ -19,13 +20,26 @@ def sched_time(logs): return sum(_block_time(blk) for blk in logs) +_CPU2017_TIME_ELAPSED = re.compile(r"Elapsed compile for '(?P[^']+)': \S+ \((?P\d+)\)") +_BACKUP_TIME_ELAPSED = re.compile(r'(?P\d+) total seconds elapsed') + + def total_compile_time_seconds(logs): - last_logs = logs.benchmarks[-1].blocks[-1].raw_log - m = re.search(r'(\d+) total seconds elapsed', last_logs) + last_blk = logs.benchmarks[-1].blocks[-1] + last_logs = last_blk.raw_log + m = [g for g in _CPU2017_TIME_ELAPSED.finditer(last_logs) + if last_blk.benchmark == g['bench']] + + if m: + if len(m) != 1: + logging.warning('Multiple CPU2017 elapsed time indicators. Using the first one out of: %s', m) + return m[0]['elapsed'] + + m = _BACKUP_TIME_ELAPSED.search(last_logs) assert m, \ 'Logs must contain "total seconds elapsed" output by the SPEC benchmark suite' - return m.group(1) + return m['elapsed'] if __name__ == '__main__': From 81fe20adb2276329df10924e5150d260f01d5438 Mon Sep 17 00:00:00 2001 From: Justin Bassett Date: Tue, 22 Jun 2021 11:08:58 -0700 Subject: [PATCH 17/45] Support pass filtering when recording compile time --- util/analyze/utils.py | 6 +++--- util/gt_analysis/gt_cmp.py | 9 +++++++-- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/util/analyze/utils.py b/util/analyze/utils.py index a036cf45..bbc5677b 100644 --- a/util/analyze/utils.py +++ b/util/analyze/utils.py @@ -11,7 +11,7 @@ def sum_dicts(ds): return {k: sum(d[k] for d in ds) for k in ds[0].keys()} -def foreach_bench(analysis_f, *logs, combine=None): +def foreach_bench(analysis_f, *logs, combine=None, **kwargs): ''' Repeats `analysis_f` for each benchmark in `logs`. Also computes the analysis for the entire thing. @@ -25,11 +25,11 @@ def foreach_bench(analysis_f, *logs, combine=None): ''' if combine is None: - combine = lambda *args: analysis_f(*logs) + combine = lambda *args: analysis_f(*logs, **kwargs) benchmarks = zip(*[log.benchmarks for log in logs]) - bench_stats = {bench[0].name: analysis_f(*bench) for bench in benchmarks} + bench_stats = {bench[0].name: analysis_f(*bench, **kwargs) for bench in benchmarks} total = combine(bench_stats.values()) return { diff --git a/util/gt_analysis/gt_cmp.py b/util/gt_analysis/gt_cmp.py index f8c30c5c..371d15f6 100755 --- a/util/gt_analysis/gt_cmp.py +++ b/util/gt_analysis/gt_cmp.py @@ -67,11 +67,15 @@ def is_improved(before: Block, after: Block): return cost_for_blk(before) > cost_for_blk(after) -def compute_stats(nogt: Logs, gt: Logs): +def compute_stats(nogt: Logs, gt: Logs, *, pass_num: int): TOTAL_BLOCKS = utils.count(nogt) nogt_all, gt_all = nogt, gt + if pass_num is not None: + nogt = nogt.keep_blocks_if(lambda b: b.single('PassFinished')['num'] == pass_num) + gt = gt.keep_blocks_if(lambda b: b.single('PassFinished')['num'] == pass_num) + NUM_PROVED_OPTIMAL_WITHOUT_ENUMERATING = utils.count(utils.zipped_keep_blocks_if( nogt, gt, pred=lambda a, b: block_stats.is_enumerated(a) and not block_stats.is_enumerated(b))[0]) nogt, gt = utils.zipped_keep_blocks_if( @@ -129,9 +133,10 @@ def compute_stats(nogt: Logs, gt: Logs): parser = argparse.ArgumentParser() parser.add_argument('nogt') parser.add_argument('gt') + parser.add_argument('--pass-num', type=int, default=None, help='Which pass to analyze (default: all passes)') args = analyze.parse_args(parser, 'nogt', 'gt') - results = utils.foreach_bench(compute_stats, args.nogt, args.gt) + results = utils.foreach_bench(compute_stats, args.nogt, args.gt, pass_num=args.pass_num) writer = csv.DictWriter(sys.stdout, fieldnames=['Benchmark'] + list(results['Total'].keys())) From 6dd293c52c3ba720cd8462a348f553a7e465b173 Mon Sep 17 00:00:00 2001 From: Justin Bassett Date: Tue, 22 Jun 2021 15:55:39 -0700 Subject: [PATCH 18/45] Adjust relative cost calculation As GT improve our LB calculation, the relative cost of no GT should be relative to the with-GT lower bound. Otherwise, the "Block Cost - Relative" metric is including the LB improvement, which is misleading. --- util/gt_analysis/gt_cmp.py | 26 ++++++++++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) diff --git a/util/gt_analysis/gt_cmp.py b/util/gt_analysis/gt_cmp.py index 371d15f6..27d98cd9 100755 --- a/util/gt_analysis/gt_cmp.py +++ b/util/gt_analysis/gt_cmp.py @@ -1,6 +1,7 @@ #!/usr/bin/env python3 import argparse +from typing import Tuple import analyze from analyze import Block, Logs, utils @@ -67,6 +68,25 @@ def is_improved(before: Block, after: Block): return cost_for_blk(before) > cost_for_blk(after) +def blk_relative_cost(nogt, gt) -> Tuple[int, int]: + no_sum = yes_sum = 0 + for no, yes in zip(nogt, gt): + no_cost = block_stats.block_cost(no) + yes_cost = block_stats.block_cost(yes) + no_lb = block_stats.block_cost_lower_bound(no) + yes_lb = block_stats.block_cost_lower_bound(yes) + assert no_lb <= yes_lb + + # relative to the tightest LB we know + no_rel = no_cost - yes_lb + yes_rel = yes_cost - yes_lb + + no_sum += no_rel + yes_sum += yes_rel + + return no_sum, yes_sum + + def compute_stats(nogt: Logs, gt: Logs, *, pass_num: int): TOTAL_BLOCKS = utils.count(nogt) @@ -83,6 +103,8 @@ def compute_stats(nogt: Logs, gt: Logs, *, pass_num: int): nogt_opt, gt_opt = utils.zipped_keep_blocks_if(nogt, gt, pred=lambda b: 'DagSolvedOptimally' in b) + nogt_rel, gt_rel = blk_relative_cost(nogt, gt) + result = { 'Total Blocks in Benchsuite': TOTAL_BLOCKS, 'Num Blocks enumerated with & without GT': utils.count(nogt), @@ -101,8 +123,8 @@ def compute_stats(nogt: Logs, gt: Logs, *, pass_num: int): 'Enum Time (opt. blocks only) (No GT)': utils.sum_stat_for_all(enum_time_for_blk, nogt_opt), 'Enum Time (opt. blocks only) (GT)': utils.sum_stat_for_all(enum_time_for_blk, gt_opt), - 'Block Cost - Relative (No GT)': utils.sum_stat_for_all(block_stats.block_relative_cost, nogt), - 'Block Cost - Relative (GT)': utils.sum_stat_for_all(block_stats.block_relative_cost, gt), + 'Block Cost - Relative (No GT)': nogt_rel, + 'Block Cost - Relative (GT)': gt_rel, 'Block Cost (No GT)': utils.sum_stat_for_all(block_stats.block_cost, nogt), 'Block Cost (GT)': utils.sum_stat_for_all(block_stats.block_cost, gt), From 8d305a87f3ae468e32f6d6cba8e6b550e7aab965 Mon Sep 17 00:00:00 2001 From: Justin Bassett Date: Wed, 23 Jun 2021 11:40:27 -0700 Subject: [PATCH 19/45] Add plaidml total compile time analysis --- util/analyze/lib/compile_times.py | 28 ++++++++++++++++++++++++---- util/gt_analysis/gt_cmp.py | 13 +++++++------ 2 files changed, 31 insertions(+), 10 deletions(-) diff --git a/util/analyze/lib/compile_times.py b/util/analyze/lib/compile_times.py index ed322820..57e404b1 100755 --- a/util/analyze/lib/compile_times.py +++ b/util/analyze/lib/compile_times.py @@ -22,6 +22,15 @@ def sched_time(logs): _CPU2017_TIME_ELAPSED = re.compile(r"Elapsed compile for '(?P[^']+)': \S+ \((?P\d+)\)") _BACKUP_TIME_ELAPSED = re.compile(r'(?P\d+) total seconds elapsed') +_PLAIDML_TIME_ELAPSED = re.compile( + r'Example finished, elapsed: (?P\S+)s \(compile\), (?P\S+)s \(execution\)') + + +def plaidml_total_compile_time_seconds(logs): + try: + return sum(float(_PLAIDML_TIME_ELAPSED.search(bench.blocks[-1].raw_log)['elapsed']) for bench in logs.benchmarks) + except TypeError: + raise KeyError('Logs must contain "Example finished, elapsed:" output by the PlaidML benchmark suite') def total_compile_time_seconds(logs): @@ -33,23 +42,34 @@ def total_compile_time_seconds(logs): if m: if len(m) != 1: logging.warning('Multiple CPU2017 elapsed time indicators. Using the first one out of: %s', m) - return m[0]['elapsed'] + return int(m[0]['elapsed']) m = _BACKUP_TIME_ELAPSED.search(last_logs) assert m, \ 'Logs must contain "total seconds elapsed" output by the SPEC benchmark suite' - return m['elapsed'] + return int(m['elapsed']) + + +def total_compile_time_seconds_f(benchsuite): + return { + 'spec': total_compile_time_seconds, + 'plaidml': plaidml_total_compile_time_seconds + }[benchsuite] if __name__ == '__main__': parser = argparse.ArgumentParser() - parser.add_argument('--variant', choices=('sched', 'total'), + parser.add_argument('--variant', choices=('sched', 'total', 'plaidml'), help='Which timing variant to use') parser.add_argument('logs', help='The logs to analyze') args = analyze.parse_args(parser, 'logs') - fn = total_compile_time_seconds if args.variant == 'total' else sched_time + fn = { + 'sched': sched_time, + 'total': total_compile_time_seconds, + 'plaidml': plaidml_total_compile_time_seconds, + }[args.variant] results = foreach_bench(fn, args.logs, combine=sum) writer = csv.DictWriter(sys.stdout, fieldnames=results.keys()) writer.writeheader() diff --git a/util/gt_analysis/gt_cmp.py b/util/gt_analysis/gt_cmp.py index 27d98cd9..02ec5d95 100755 --- a/util/gt_analysis/gt_cmp.py +++ b/util/gt_analysis/gt_cmp.py @@ -8,7 +8,6 @@ from analyze.lib import block_stats, compile_times sched_time = compile_times.sched_time -total_compile_time_seconds = compile_times.total_compile_time_seconds def blocks_enumerated_optimally(blocks): @@ -87,9 +86,7 @@ def blk_relative_cost(nogt, gt) -> Tuple[int, int]: return no_sum, yes_sum -def compute_stats(nogt: Logs, gt: Logs, *, pass_num: int): - TOTAL_BLOCKS = utils.count(nogt) - +def compute_stats(nogt: Logs, gt: Logs, *, pass_num: int, total_compile_time_seconds): nogt_all, gt_all = nogt, gt if pass_num is not None: @@ -106,7 +103,7 @@ def compute_stats(nogt: Logs, gt: Logs, *, pass_num: int): nogt_rel, gt_rel = blk_relative_cost(nogt, gt) result = { - 'Total Blocks in Benchsuite': TOTAL_BLOCKS, + 'Total Blocks in Benchsuite': utils.count(nogt_all), 'Num Blocks enumerated with & without GT': utils.count(nogt), 'Num Blocks proved optimal just by GT': NUM_PROVED_OPTIMAL_WITHOUT_ENUMERATING, @@ -158,7 +155,11 @@ def compute_stats(nogt: Logs, gt: Logs, *, pass_num: int): parser.add_argument('--pass-num', type=int, default=None, help='Which pass to analyze (default: all passes)') args = analyze.parse_args(parser, 'nogt', 'gt') - results = utils.foreach_bench(compute_stats, args.nogt, args.gt, pass_num=args.pass_num) + results = utils.foreach_bench( + compute_stats, args.nogt, args.gt, + pass_num=args.pass_num, + total_compile_time_seconds=compile_times.total_compile_time_seconds_f(args.benchsuite), + ) writer = csv.DictWriter(sys.stdout, fieldnames=['Benchmark'] + list(results['Total'].keys())) From 519991125e32f78823840aae6a63928a256f1942 Mon Sep 17 00:00:00 2001 From: Justin Bassett Date: Thu, 24 Jun 2021 11:53:17 -0700 Subject: [PATCH 20/45] Properly zip cases where zip(*logs) is empty --- util/analyze/utils.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/util/analyze/utils.py b/util/analyze/utils.py index bbc5677b..39f68a80 100644 --- a/util/analyze/utils.py +++ b/util/analyze/utils.py @@ -74,6 +74,15 @@ def zipped_keep_blocks_if(*logs, pred): except TypeError: old_pred = pred pred = lambda *blks: all(old_pred(b) for b in blks) + except StopIteration: + # There was nothing in zip(*logs)... + old_pred = pred + def new_pred(*blks): + try: + return old_pred(*blks) + except TypeError: + return all(old_pred(b) for b in blks) + pred = new_pred def zip_benchmarks_if(*benchmarks): # (A[a], A[a]) -> [(a, a)] or [] From b3851d5c5dbdfbdbb0f0763ca7746b077d2045a8 Mon Sep 17 00:00:00 2001 From: Justin Bassett Date: Thu, 24 Jun 2021 14:42:02 -0700 Subject: [PATCH 21/45] Add shoc total compile time analysis --- util/analyze/lib/compile_times.py | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/util/analyze/lib/compile_times.py b/util/analyze/lib/compile_times.py index 57e404b1..212c5b8b 100755 --- a/util/analyze/lib/compile_times.py +++ b/util/analyze/lib/compile_times.py @@ -24,6 +24,18 @@ def sched_time(logs): _BACKUP_TIME_ELAPSED = re.compile(r'(?P\d+) total seconds elapsed') _PLAIDML_TIME_ELAPSED = re.compile( r'Example finished, elapsed: (?P\S+)s \(compile\), (?P\S+)s \(execution\)') +_SHOC_TIME_ELAPSED = re.compile(r'Finished compiling; total ns = (?P\d+)') + + +def shoc_total_compile_time_seconds(logs): + try: + elapsed = sum(int(m['elapsed']) + for bench in logs.benchmarks + for blk in bench + for m in _SHOC_TIME_ELAPSED.finditer(blk.raw_log)) + return float(elapsed) * 1e-9 + except TypeError: + raise KeyError('Logs must contain "Finished compiling; total ns = " output by the modified SHOC benchmark suite') def plaidml_total_compile_time_seconds(logs): @@ -54,7 +66,8 @@ def total_compile_time_seconds(logs): def total_compile_time_seconds_f(benchsuite): return { 'spec': total_compile_time_seconds, - 'plaidml': plaidml_total_compile_time_seconds + 'plaidml': plaidml_total_compile_time_seconds, + 'shoc': shoc_total_compile_time_seconds, }[benchsuite] @@ -69,6 +82,7 @@ def total_compile_time_seconds_f(benchsuite): 'sched': sched_time, 'total': total_compile_time_seconds, 'plaidml': plaidml_total_compile_time_seconds, + 'shoc': shoc_total_compile_time_seconds, }[args.variant] results = foreach_bench(fn, args.logs, combine=sum) writer = csv.DictWriter(sys.stdout, fieldnames=results.keys()) From b5392cb215ecf36f874a20b4b971de4702ecea2e Mon Sep 17 00:00:00 2001 From: Justin Bassett Date: Mon, 5 Jul 2021 11:37:47 -0700 Subject: [PATCH 22/45] Fix validation test Applies these fixes: - Teaches validation test about blocks across different passes. Now, if a block was not optimal on a prior pass, the script knows it is not optimal in later passes. - Teaches validation test to handle dags with the same name. Many such examples exist in the benchmark suites that we use. Additionally, a large number of commandline options have been added. --- util/misc/validation-test.py | 450 +++++++++++++++++++++++++---------- 1 file changed, 327 insertions(+), 123 deletions(-) diff --git a/util/misc/validation-test.py b/util/misc/validation-test.py index 2373c104..c50d8858 100755 --- a/util/misc/validation-test.py +++ b/util/misc/validation-test.py @@ -1,23 +1,108 @@ -#/usr/bin/python3 -# TODO -# 1: Add options praser. -# 2: Make printing all mismatched dags optional and disabled by default. -# 3: Add option to print out x number of blocks with largest mismatches. -# 4: Add option to print out x number of mismatches with smallest number of instructions. - -import os, sys +# /usr/bin/python3 + +import sys import itertools -from typing import List +from typing import Callable, Dict, List, Optional, Tuple +from dataclasses import dataclass, field +from collections import defaultdict +from enum import Enum +from textwrap import dedent import argparse import analyze from analyze import Logs + +@dataclass +class DagInfo: + id: str + benchmark: str + num_instructions: int + pass_num: int + + lower_bound: int + relative_cost: int + length: int + is_optimal: bool + + @property + def cost(self): + return self.lower_bound + self.relative_cost + + +class MismatchKind(Enum): + BOTH_OPTIMAL_BUT_UNEQUAL = 0 + FIRST_OPTIMAL_BUT_WORSE = 1 + SECOND_OPTIMAL_BUT_WORSE = 2 + + +@dataclass +class Mismatch: + # The dag id: function name + basic block number + dag_id: str + # Which benchmark this region comes from + benchmark: str + # The number of instructions for this region + region_size: int + + # The cost information indexed by "first" == 0, "second" == 1 + lengths: Tuple[int, int] + costs: Tuple[int, int] + + kind: MismatchKind + + +@dataclass +class ValidationInfo: + num_regions_first: int = 0 + num_regions_second: int = 0 + + num_optimal_both: int = 0 + num_optimal_first: int = 0 + num_optimal_second: int = 0 + + num_mismatch: Dict[MismatchKind, int] = field(default_factory=lambda: defaultdict(lambda: 0)) + + mismatches: List[Mismatch] = field(default_factory=list) + + # Explain this many of the blocks missing a lower bound MISSING_LOWER_BOUND_DUMP_COUNT = 3 MISSING_LOWER_BOUND_DUMP_LINES = 10 -def dags_info(logs: Logs): +# If there is no PassFinished, what pass "number" should we consider this to be? +DEFAULT_PASS = [{'num': 0}] + + +def split_adjacent(iterable, adj_eq=None): + ''' + Splits the iterable into regions of "equal values" as specified by adj_eq. + + Examples: + split_adjacent([1, 1, 1, 2, 2, 2, 2, 2, 3, 5]) # -> [(1, 1, 1), (2, 2, 2, 2, 2), (3,), (5,)] + split_adjacent([1, 2, 3, 4, 1, 2, 3, 4, 2, 3, 8, 9], lambda x, y: x <= y) + # -> [(1, 2, 3, 4), (1, 2, 3, 4), (2, 3, 8, 9)] + ''' + if adj_eq is None: + # Default: use == + def adj_eq(x, y): return x == y + + values = [] + for x in iterable: + if values and not adj_eq(values[-1], x): + yield tuple(values) + values.clear() + values.append(x) + + assert len(values) > 0 + yield tuple(values) + + +def pass_num(block) -> int: + return block.get('PassFinished', DEFAULT_PASS)[0]['num'] + + +def extract_dag_info(logs: Logs) -> Dict[str, List[List[DagInfo]]]: dags = {} blocks = list(logs) @@ -26,7 +111,7 @@ def dags_info(logs: Logs): if no_lb: print('WARNING: Missing a logged lower bound for {missing}/{total} blocks.' - .format(missing=len(no_lb), total=len(blocks)), file=sys.stderr) + .format(missing=len(no_lb), total=len(blocks)), file=sys.stderr) trimmed = ('\n'.join(block.raw_log.splitlines()[:MISSING_LOWER_BOUND_DUMP_LINES]) for block in no_lb) @@ -35,20 +120,213 @@ def dags_info(logs: Logs): file=sys.stderr) for block in blocks: - lowerBound = block['CostLowerBound'][-1]['cost'] - blockInfo = block.single('BestResult') - dagName = blockInfo['name'] - dags[dagName] = { - 'lowerBound': lowerBound, - 'cost': blockInfo['cost'] + lowerBound, - 'relativeCost': blockInfo['cost'], - 'length': blockInfo['length'], - 'isOptimal': blockInfo['optimal'] - } + try: + best_result = block.single('BestResult') + is_optimal = best_result['optimal'] + except KeyError: + try: + best_result = block.single('HeuristicResult') + is_optimal = best_result['cost'] == 0 or \ + 'INFO: Marking SLIL list schedule as optimal due to zero PERP.' in block.raw_log + except KeyError: + print('ERROR: unable to extract BestResult or HeuristicResult from block', file=sys.stderr) + print(block.raw_log) + exit(2) + + dags.setdefault(block.name, []).append(DagInfo( + id=block.name, + benchmark=block.benchmark, + num_instructions=block.single('ProcessDag')['num_instructions'], + pass_num=pass_num(block), + lower_bound=block.single('CostLowerBound')['cost'], + relative_cost=best_result['cost'], + length=best_result['length'], + is_optimal=is_optimal, + )) + + for k, block_passes in dags.items(): + # Safe to modify dags while iterating because we use .items() to get a copy + dags[k] = list(map(list, split_adjacent(block_passes, lambda x, y: x.pass_num < y.pass_num))) return dags +def parse_mismatch(blk1: DagInfo, blk2: DagInfo) -> Optional[Mismatch]: + mismatch = Mismatch( + dag_id=blk1.id, + benchmark=blk1.benchmark, + region_size=blk1.num_instructions, + lengths=(blk1.length, blk2.length), + costs=(blk1.cost, blk2.cost), + kind=None, + ) + if blk1.is_optimal and blk2.is_optimal: + mismatch.kind = MismatchKind.BOTH_OPTIMAL_BUT_UNEQUAL + return mismatch if blk1.cost != blk2.cost else None + elif blk1.is_optimal: + mismatch.kind = MismatchKind.FIRST_OPTIMAL_BUT_WORSE + return mismatch if blk1.cost > blk2.cost else None + elif blk2.is_optimal: + mismatch.kind = MismatchKind.SECOND_OPTIMAL_BUT_WORSE + return mismatch if blk2.cost > blk1.cost else None + else: + return None + + +def classify_optimal(out: ValidationInfo, blk1: DagInfo, blk2: DagInfo): + if blk1.is_optimal and blk2.is_optimal: + out.num_optimal_both += 1 + elif blk1.is_optimal: + out.num_optimal_first += 1 + elif blk2.is_optimal: + out.num_optimal_second += 1 + + +def classify_mismatch(out: ValidationInfo, mismatch: Mismatch): + out.num_mismatch[mismatch.kind] += 1 + + +def validate_dags(dags1: Dict[str, List[List[DagInfo]]], dags2: Dict[str, List[List[DagInfo]]]) -> ValidationInfo: + result = ValidationInfo(num_regions_first=len(dags1), num_regions_second=len(dags2)) + + for region_f, region_s in zip(dags1.items(), dags2.items()): + name_f, grouped_blocks_f = region_f + name_s, grouped_blocks_s = region_s + + for blocks_f, blocks_s in zip(grouped_blocks_f, grouped_blocks_s): + # blocks_* is the groups of blocks referring to the same problem, with different pass nums. + blocks = list(zip(blocks_f, blocks_s)) + + block_f, block_s = blocks[0] + classify_optimal(result, block_f, block_s) + + mismatch = parse_mismatch(block_f, block_s) + if mismatch is not None: + classify_mismatch(result, mismatch) + result.mismatches.append(mismatch) + + for next_block_f, next_block_s in blocks[1:]: + if not block_f.is_optimal: + next_block_f.is_optimal = False + if not block_s.is_optimal: + next_block_s.is_optimal = False + + classify_optimal(result, next_block_f, next_block_s) + mismatch = parse_mismatch(next_block_f, next_block_s) + if mismatch is not None: + classify_mismatch(result, mismatch) + result.mismatches.append(mismatch) + + block_f, block_s = next_block_f, next_block_s + + return result + + +def print_mismatches(info: ValidationInfo, + print_stats_info: Callable[[ValidationInfo], None], + print_mismatch_info: Callable[[ValidationInfo], None], + print_mismatch_summaries: List[Callable[[List[Mismatch]], None]]): + print_stats_info(info) + print_mismatch_info(info) + + if info.mismatches: + for print_summary in print_mismatch_summaries: + print_summary(info.mismatches) + + +def enable_if(cond: bool): + def wrapped(f): + return f if cond else lambda *args: None + + return wrapped + + +# The quantity of blocks with the largest mismatches to print. +NUM_LARGEST_MISMATCHES_PRINT = 10 +# The quantity of mismatched blocks with the shortest length to print. +NUM_SMALLEST_BLOCKS_PRINT = 50 + + +def main(first, second, + quiet: bool = False, + summarize_biggest_cost_difference: bool = True, + summarize_smallest_regions: bool = True): + dags1 = extract_dag_info(first) + dags2 = extract_dag_info(second) + info: ValidationInfo = validate_dags(dags1, dags2) + + @enable_if(not quiet) + def print_stats_info(info: ValidationInfo): + print('Optimal Block Stats') + print('-----------------------------------------------------------') + print('Blocks in log file 1: ' + str(info.num_regions_first)) + print('Blocks in log file 2: ' + str(info.num_regions_second)) + print('Blocks that are optimal in both files: ' + str(info.num_optimal_both)) + print('Blocks that are optimal in log 1 but not in log 2: ' + str(info.num_optimal_first)) + print('Blocks that are optimal in log 2 but not in log 1: ' + str(info.num_optimal_second)) + print('----------------------------------------------------------\n') + + @enable_if(info.mismatches or not quiet) + def print_mismatch_info(info: ValidationInfo): + print('Mismatch stats') + print('-----------------------------------------------------------') + print('Mismatches where blocks are optimal in both logs but have different costs: ' + + str(info.num_mismatch[MismatchKind.BOTH_OPTIMAL_BUT_UNEQUAL])) + print('Mismatches where the block is optimal in log 1 but it has a higher cost than the non-optimal block in log 2: ' + + str(info.num_mismatch[MismatchKind.FIRST_OPTIMAL_BUT_WORSE])) + print('Mismatches where the block is optimal in log 2 but it has a higher cost than the non-optimal block in log 1: ' + + str(info.num_mismatch[MismatchKind.SECOND_OPTIMAL_BUT_WORSE])) + print('Total mismatches: ' + str(len(info.mismatches))) + print('-----------------------------------------------------------\n') + + def print_block_info(index: int, mismatch: Mismatch): + cost_diff = mismatch.costs[0] - mismatch.costs[1] + print(dedent(f'''\ + {index}: + Block Name: {mismatch.dag_id} + Num Instructions: {mismatch.region_size} + Length: {mismatch.lengths[0]} --> {mismatch.lengths[1]} + Difference in cost: {cost_diff} + Percent cost difference: {(cost_diff / mismatch.costs[0])*100:0.2f} % + ''' + )) + + @enable_if(summarize_biggest_cost_difference) + def print_big_diff_summary(mismatches: List[Mismatch]): + if NUM_LARGEST_MISMATCHES_PRINT == 0: + print('Requested 0 mismatched blocks with the largest difference in cost') + return + + print('The ' + str(NUM_LARGEST_MISMATCHES_PRINT) + ' mismatched blocks with the largest difference in cost') + print('-----------------------------------------------------------') + sortedMaxMis = sorted(mismatches, key=lambda m: abs(m.costs[1] - m.costs[0]), reverse=True) + for index, mismatch in enumerate(sortedMaxMis[:NUM_LARGEST_MISMATCHES_PRINT]): + print_block_info(index, mismatch) + print('-----------------------------------------------------------\n') + + @enable_if(summarize_smallest_regions) + def print_small_summary(mismatches: List[Mismatch]): + if NUM_SMALLEST_BLOCKS_PRINT == 0: + print('Requested 0 mismatched blocks with the smallest block size') + return + + print('The smallest ' + str(NUM_SMALLEST_BLOCKS_PRINT) + ' mismatched blocks') + print('-----------------------------------------------------------') + sortedMisSize = sorted(mismatches, key=lambda m: m.region_size) + for index, mismatch in enumerate(sortedMisSize[:NUM_LARGEST_MISMATCHES_PRINT]): + print_block_info(index, mismatch) + print('-----------------------------------------------------------\n') + + print_mismatches( + info, + print_stats_info=print_stats_info, + print_mismatch_info=print_mismatch_info, + print_mismatch_summaries=[print_big_diff_summary, print_small_summary] + ) + if info.mismatches: + exit(f'{len(info.mismatches)} mismatches found') + + if __name__ == "__main__": dags1 = {} dags2 = {} @@ -56,107 +334,33 @@ def dags_info(logs: Logs): parser = argparse.ArgumentParser() parser.add_argument('first') parser.add_argument('second') + + parser.add_argument('-q', '--quiet', action='store_true', + help='Only print mismatch info, and only if there are mismatches') + parser.add_argument('--no-summarize-largest-cost-difference', action='store_false', + help='Do not summarize the mismatches with the biggest difference in cost') + parser.add_argument('--no-summarize-smallest-mismatches', action='store_false', + help='Do not summarize the mismatches with the smallest region size') + + parser.add_argument('--num-largest-cost-mismatches-print', type=int, default=10, + help='The number of mismatches blocks with the largest (by cost) mismatches to print') + parser.add_argument('--num-smallest-mismatches-print', type=int, default=10, + help='The number of mismatched blocks with the shortest length to print') + + parser.add_argument('--missing-lb-dump-count', type=int, default=3, + help='The number of blocks with missing lower bounds to display') + parser.add_argument('--missing-lb-dump-lines', type=int, default=10, + help='The number of lines of a block with missing lower bound to display') args = analyze.parse_args(parser, 'first', 'second') - dags1 = dags_info(args.first) - dags2 = dags_info(args.second) - - numDagsLog1 = len(dags1) - numDagsLog2 = len(dags2) - # The number of blocks that are optimal in both logs. - optimalInBoth = 0 - # The number of blocks that are only optimal in log 1. - optimalLog1 = 0 - # The number of blocks that are only optimal in log 2. - optimalLog2 = 0 - # Mismatches where blocks are optimal in both logs but have different costs. - misNonEqual = 0 - # Mismatches where block is optimal in log 1 but it has a higher cost than the non-optimal block in log 2. - misBlk1Opt = 0 - # Mismatches where block is optimal in log 2 but it has a higher cost than the non-optimal block in log 1. - misBlk2Opt = 0 - # The quantity of blocks with the largest mismatches to print. - numLarMisPrt = 10 - # The quantity of mismatched blocks with the shortest length to print. - numSmlBlkPrt = 50 - # Dictionary with the sizes of the mismatches for each mismatched block and the size of the block. - mismatches = {} - - - - if numDagsLog1 != numDagsLog2: - print('Error: Different number of dags in each log file.') - - for dagName in dags1: - if dagName not in dags2: - print('Error: Could not find ' + dagName + ' in the second log file.') - continue - - dag1 = dags1[dagName] - dag2 = dags2[dagName] - if dag1['isOptimal'] and dag2['isOptimal']: - optimalInBoth+=1 - if dag1['cost'] != dag2['cost']: - # There was a mismatch where blocks are optimal in both logs but have different costs - misNonEqual += 1 - mismatches[dagName] = {} - mismatches[dagName]['length'] = dag1['length'] - mismatches[dagName]['misSize'] = abs(dag1['cost'] - dag2['cost']) - #print('Mismatch for dag ' + dagName + ' (Both optimal with non-equal cost)') - - elif dag1['isOptimal']: - optimalLog1+=1 - if dag1['cost'] > dag2['cost']: - # There was a mismatch where block is optimal in log 1 but it has a higher cost than the non-optimal block in log 2 - misBlk1Opt += 1 - mismatches[dagName] = {} - mismatches[dagName]['length'] = dag1['length'] - mismatches[dagName]['misSize'] = dag1['cost'] - dag2['cost'] - #print('Mismatch for dag ' + dagName + ' (Only optimal in log 1 but has higher cost than the non-optimal block in log 2)') - - elif dag2['isOptimal']: - optimalLog2+=1 - if dag2['cost'] > dag1['cost']: - # There was a mismatch where block is optimal in log 2 but it has a higher cost than the non-optimal block in log 1 - misBlk2Opt += 1 - mismatches[dagName] = {} - mismatches[dagName]['length'] = dag1['length'] - mismatches[dagName]['misSize'] = dag2['cost'] - dag1['cost'] - #print('Mismatch for dag ' + dagName + ' (Only optimal in log 2 but has higher cost than the non-optimal block in log 1)') - - print('Optimal Block Stats') - print('-----------------------------------------------------------') - print('Blocks in log file 1: ' + str(numDagsLog1)) - print('Blocks in log file 2: ' + str(numDagsLog2)) - print('Blocks that are optimal in both files: ' + str(optimalInBoth)) - print('Blocks that are optimal in log 1 but not in log 2: ' + str(optimalLog1)) - print('Blocks that are optimal in log 2 but not in log 1: ' + str(optimalLog2)) - print('----------------------------------------------------------\n') - - print('Mismatch stats') - print('-----------------------------------------------------------') - print('Mismatches where blocks are optimal in both logs but have different costs: ' + str(misNonEqual)) - print('Mismatches where the block is optimal in log 1 but it has a higher cost than the non-optimal block in log 2: ' + str(misBlk1Opt)) - print('Mismatches where the block is optimal in log 2 but it has a higher cost than the non-optimal block in log 1: ' + str(misBlk2Opt)) - print('Total mismatches: ' + str(misNonEqual + misBlk1Opt + misBlk2Opt)) - print('-----------------------------------------------------------\n') - - print('The ' + str(numLarMisPrt) + ' mismatched blocks with the largest difference in cost') - print('-----------------------------------------------------------') - sortedMaxMis = sorted(mismatches.items(), key=lambda i: (mismatches[i[0]]['misSize'], i[0]), reverse=True) - i = 1 - for block in sortedMaxMis[:numLarMisPrt]: - print(str(i) + ':') - print('Block Name: ' + block[0] + '\nLength: ' + str(block[1]['length']) + '\nDifference in cost: ' + str(block[1]['misSize'])) - i += 1 - print('-----------------------------------------------------------\n') - - print('The smallest ' + str(numSmlBlkPrt) + ' mismatched blocks') - print('-----------------------------------------------------------') - sortedMisSize = sorted(mismatches.items(), key=lambda i: (mismatches[i[0]]['length'], i[0])) - i = 1 - for block in sortedMisSize[:numSmlBlkPrt]: - print(str(i) + ':') - print('Block Name: ' + block[0] + '\nLength: ' + str(block[1]['length']) + '\nDifference in cost: ' + str(block[1]['misSize'])) - i += 1 - print('-----------------------------------------------------------') + NUM_LARGEST_MISMATCHES_PRINT = args.num_largest_cost_mismatches_print + NUM_SMALLEST_BLOCKS_PRINT = args.num_smallest_mismatches_print + MISSING_LOWER_BOUND_DUMP_COUNT = args.missing_lb_dump_count + MISSING_LOWER_BOUND_DUMP_LINES = args.missing_lb_dump_lines + + main( + args.first, args.second, + quiet=args.quiet, + summarize_biggest_cost_difference=not args.no_summarize_largest_cost_difference, + summarize_smallest_regions=not args.no_summarize_smallest_mismatches, + ) From f3350b60d72b41a3fd25576715991f5f6dad7b88 Mon Sep 17 00:00:00 2001 From: Justin Bassett Date: Wed, 14 Jul 2021 13:26:33 -0700 Subject: [PATCH 23/45] Provide a human-readable output format --- util/analyze/__init__.py | 2 +- util/analyze/_main.py | 8 ++- util/analyze/ioutils.py | 90 +++++++++++++++++++++++++++++++++ util/analyze/lib/block_stats.py | 8 ++- util/gt_analysis/gt_cmp.py | 9 ++-- 5 files changed, 103 insertions(+), 14 deletions(-) create mode 100644 util/analyze/ioutils.py diff --git a/util/analyze/__init__.py b/util/analyze/__init__.py index f85cd924..1104ae3b 100644 --- a/util/analyze/__init__.py +++ b/util/analyze/__init__.py @@ -1,5 +1,5 @@ from ._types import Logs, Benchmark, Block from ._main import parse_args from .imports import import_cpu2006, import_plaidml, import_shoc, import_utils -from . import utils +from . import utils, ioutils from .utils import foreach_bench diff --git a/util/analyze/_main.py b/util/analyze/_main.py index d4c2d7b4..00b2d487 100644 --- a/util/analyze/_main.py +++ b/util/analyze/_main.py @@ -75,14 +75,14 @@ def parse_args(parser: argparse.ArgumentParser, *names, args=None): 'plaidml': import_plaidml.parse, 'shoc': import_shoc.parse, } - parser = FILE_PARSERS[args.benchsuite] + fileparser = FILE_PARSERS[args.benchsuite] blk_filter = block_filter(args.keep_blocks_if) if args.keep_blocks_if is not True else True args_dict = vars(args) def parse_input(x): if isinstance(x, str): - result = parser(x) + result = fileparser(x) if blk_filter is not True: result = result.keep_blocks_if(blk_filter) return result @@ -94,4 +94,8 @@ def parse_input(x): for name in names: args_dict[name] = parse_input(args_dict[name]) + if hasattr(parser, '__analyze_post_process_parse_args__'): + for argname, postprocess in getattr(parser, '__analyze_post_process_parse_args__').items(): + args_dict[argname] = postprocess(args_dict[argname]) + return args diff --git a/util/analyze/ioutils.py b/util/analyze/ioutils.py new file mode 100644 index 00000000..6de6647a --- /dev/null +++ b/util/analyze/ioutils.py @@ -0,0 +1,90 @@ +import argparse +import csv + + +class _Writer: + def __init__(self, add_bench): + self.__add_bench = add_bench + + def __addinfo(self, bench, data): + if self.__add_bench: + return {'Benchmark': bench, **data} + return data + + def benchdata(self, bench, data): + self._benchdata(self.__addinfo(bench, data)) + + def finish(self): + self._finish() + + +class _CSVWriter(_Writer): + def __init__(self, f, data: dict, fieldnames=None): + add_bench = fieldnames is None or 'Benchmark' in fieldnames and 'Benchmark' not in data + super().__init__(add_bench) + + if fieldnames is None: + fieldnames = ['Benchmark', *data['Total'].keys()] + + self.__csv_writer = csv.DictWriter(f, fieldnames=fieldnames) + self.__csv_writer.writeheader() + + def _benchdata(self, data): + self.__csv_writer.writerow(data) + + def _finish(self): + pass + + +class _HumanWriter(_Writer): + def __init__(self, f, data: dict, fieldnames=None): + add_bench = fieldnames is None or 'Benchmark' in fieldnames and 'Benchmark' not in data + super().__init__(add_bench) + + if fieldnames is None: + fieldnames = ['Benchmark', *data['Total'].keys()] + + self.__f = f + self.__fieldnames = fieldnames + self.__data = {name: [f'{name}:'] for name in fieldnames} + self.__num_entries = 1 + + def _benchdata(self, data): + self.__num_entries += 1 + for k, v in data.items(): + self.__data[k].append(str(v)) + + def _finish(self): + col_max = [max(len(self.__data[field][index]) for field in self.__fieldnames) + for index in range(self.__num_entries)] + for field in self.__fieldnames: + for index, val in enumerate(self.__data[field]): + self.__f.write(f'{val:{col_max[index]+1}}') + self.__f.write('\n') + + +def _write_data(writer: _Writer, data: dict): + for bench, bench_data in data.items(): + writer.benchdata(bench, bench_data) + writer.finish() + + +def write_csv(f, data: dict, *, fieldnames=None): + _write_data(_CSVWriter(f, data, fieldnames), data) + + +def write_human(f, data: dict, *, fieldnames=None): + _write_data(_HumanWriter(f, data, fieldnames), data) + + +def add_output_format_arg(parser: argparse.ArgumentParser, default='csv'): + parser.add_argument('--format', default=default, choices=('csv', 'human'), + help=f'Which format style to use (default: {default})') + FORMAT_OPTIONS = { + 'csv': write_csv, + 'human': write_human, + } + + if not hasattr(parser, '__analyze_post_process_parse_args__'): + setattr(parser, '__analyze_post_process_parse_args__', {}) + getattr(parser, '__analyze_post_process_parse_args__')['format'] = FORMAT_OPTIONS.__getitem__ diff --git a/util/analyze/lib/block_stats.py b/util/analyze/lib/block_stats.py index 0568ebd9..56ab7089 100755 --- a/util/analyze/lib/block_stats.py +++ b/util/analyze/lib/block_stats.py @@ -2,6 +2,7 @@ from typing import * import argparse +from analyze import ioutils import analyze from analyze import Block, Logs, utils @@ -82,12 +83,9 @@ def compute_block_stats(logs: Logs): parser = argparse.ArgumentParser( description='Computes the block stats for the logs') parser.add_argument('logs', help='The logs to analyze') + ioutils.add_output_format_arg(parser) args = analyze.parse_args(parser, 'logs') results = utils.foreach_bench(compute_block_stats, args.logs) - writer = csv.DictWriter(sys.stdout, - fieldnames=['Benchmark'] + list(results['Total'].keys())) - writer.writeheader() - for bench, bench_res in results.items(): - writer.writerow({'Benchmark': bench, **bench_res}) + args.format(result) diff --git a/util/gt_analysis/gt_cmp.py b/util/gt_analysis/gt_cmp.py index 02ec5d95..c7a5ca65 100755 --- a/util/gt_analysis/gt_cmp.py +++ b/util/gt_analysis/gt_cmp.py @@ -4,7 +4,7 @@ from typing import Tuple import analyze -from analyze import Block, Logs, utils +from analyze import Block, Logs, utils, ioutils from analyze.lib import block_stats, compile_times sched_time = compile_times.sched_time @@ -153,6 +153,7 @@ def compute_stats(nogt: Logs, gt: Logs, *, pass_num: int, total_compile_time_sec parser.add_argument('nogt') parser.add_argument('gt') parser.add_argument('--pass-num', type=int, default=None, help='Which pass to analyze (default: all passes)') + ioutils.add_output_format_arg(parser) args = analyze.parse_args(parser, 'nogt', 'gt') results = utils.foreach_bench( @@ -161,8 +162,4 @@ def compute_stats(nogt: Logs, gt: Logs, *, pass_num: int, total_compile_time_sec total_compile_time_seconds=compile_times.total_compile_time_seconds_f(args.benchsuite), ) - writer = csv.DictWriter(sys.stdout, - fieldnames=['Benchmark'] + list(results['Total'].keys())) - writer.writeheader() - for bench, bench_res in results.items(): - writer.writerow({'Benchmark': bench, **bench_res}) + args.format(sys.stdout, results) From f862b89f22469f13580a0da4ba047e2f613a84d7 Mon Sep 17 00:00:00 2001 From: Justin Bassett Date: Wed, 14 Jul 2021 14:31:18 -0700 Subject: [PATCH 24/45] Calculate time spent in LB + heuristic --- util/analyze/lib/compile_times.py | 22 +++++++++++++++++++--- util/gt_analysis/gt_cmp.py | 4 ++++ 2 files changed, 23 insertions(+), 3 deletions(-) diff --git a/util/analyze/lib/compile_times.py b/util/analyze/lib/compile_times.py index 212c5b8b..8af24bde 100755 --- a/util/analyze/lib/compile_times.py +++ b/util/analyze/lib/compile_times.py @@ -20,6 +20,22 @@ def sched_time(logs): return sum(_block_time(blk) for blk in logs) +def heuristic_time_for_blk(blk: Block) -> int: + return blk.single('HeuristicResult')['elapsed'] + + +def heuristic_time(logs): + return sum(heuristic_time_for_blk(b) for b in logs) + + +def first_lower_bound_time_for_blk(blk: Block) -> int: + return blk['CostLowerBound'][0]['elapsed'] + + +def first_lower_bound_time(logs): + return sum(first_lower_bound_time_for_blk(blk) for blk in logs) + + _CPU2017_TIME_ELAPSED = re.compile(r"Elapsed compile for '(?P[^']+)': \S+ \((?P\d+)\)") _BACKUP_TIME_ELAPSED = re.compile(r'(?P\d+) total seconds elapsed') _PLAIDML_TIME_ELAPSED = re.compile( @@ -30,9 +46,9 @@ def sched_time(logs): def shoc_total_compile_time_seconds(logs): try: elapsed = sum(int(m['elapsed']) - for bench in logs.benchmarks - for blk in bench - for m in _SHOC_TIME_ELAPSED.finditer(blk.raw_log)) + for bench in logs.benchmarks + for blk in bench + for m in _SHOC_TIME_ELAPSED.finditer(blk.raw_log)) return float(elapsed) * 1e-9 except TypeError: raise KeyError('Logs must contain "Finished compiling; total ns = " output by the modified SHOC benchmark suite') diff --git a/util/gt_analysis/gt_cmp.py b/util/gt_analysis/gt_cmp.py index c7a5ca65..039939ca 100755 --- a/util/gt_analysis/gt_cmp.py +++ b/util/gt_analysis/gt_cmp.py @@ -113,6 +113,10 @@ def compute_stats(nogt: Logs, gt: Logs, *, pass_num: int, total_compile_time_sec 'Total Sched Time (GT)': sched_time(gt), 'Enum Time (No GT)': utils.sum_stat_for_all(enum_time_for_blk, nogt), 'Enum Time (GT)': utils.sum_stat_for_all(enum_time_for_blk, gt), + 'Lower Bound Time (No GT)': compile_times.first_lower_bound_time(nogt), + 'Lower Bound Time (GT)': compile_times.first_lower_bound_time(gt), + 'Heuristic Time (No GT)': compile_times.heuristic_time(nogt), + 'Heuristic Time (GT)': compile_times.heuristic_time(gt), 'Total GT Time': utils.sum_stat_for_all(total_gt_elapsed_for_blk, gt), 'Total Sched Time (opt. blks only) (No GT)': sched_time(nogt_opt), From fea4e583bb22f35c6417e3acec617a2510171fc1 Mon Sep 17 00:00:00 2001 From: Justin Bassett Date: Thu, 15 Jul 2021 17:10:18 -0700 Subject: [PATCH 25/45] Transpose CSV output --- util/analyze/ioutils.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/util/analyze/ioutils.py b/util/analyze/ioutils.py index 6de6647a..ca52b9ec 100644 --- a/util/analyze/ioutils.py +++ b/util/analyze/ioutils.py @@ -1,5 +1,6 @@ import argparse import csv +from io import StringIO class _Writer: @@ -26,14 +27,18 @@ def __init__(self, f, data: dict, fieldnames=None): if fieldnames is None: fieldnames = ['Benchmark', *data['Total'].keys()] - self.__csv_writer = csv.DictWriter(f, fieldnames=fieldnames) + self.__f = f + self.__mem_file = StringIO() + self.__csv_writer = csv.DictWriter(self.__mem_file, fieldnames=fieldnames) self.__csv_writer.writeheader() def _benchdata(self, data): self.__csv_writer.writerow(data) def _finish(self): - pass + self.__mem_file.seek(0) + transposed = zip(*csv.reader(self.__mem_file)) + csv.writer(self.__f).writerows(transposed) class _HumanWriter(_Writer): From 4aa6d7f3f2edbf9a79dd87edcb0e485fb807d9a5 Mon Sep 17 00:00:00 2001 From: Justin Bassett Date: Fri, 16 Jul 2021 17:34:18 -0700 Subject: [PATCH 26/45] Add tool to combine sharded csv metrics --- util/misc/merge-csv.py | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) create mode 100755 util/misc/merge-csv.py diff --git a/util/misc/merge-csv.py b/util/misc/merge-csv.py new file mode 100755 index 00000000..c6e9daf1 --- /dev/null +++ b/util/misc/merge-csv.py @@ -0,0 +1,27 @@ +#!/usr/bin/env python3 +import argparse +import csv +import sys + + +def main(infile, outfile): + metrics = {} + metric_names = [] + + for metric, total, bench in csv.reader(infile): + assert total == bench or total == 'Total' + if metric not in metrics: + metric_names.append(metric) + + metrics.setdefault(metric, []).append(bench) + + writer = csv.writer(outfile) + for metric in metric_names: + try: + writer.writerow([metric, sum(int(x) for x in metrics[metric]), *metrics[metric]]) + except ValueError: + writer.writerow([metric, 'Total', *metrics[metric]]) + + +if __name__ == '__main__': + main(sys.stdin, sys.stdout) From 64d6798083544dbc6bda2cfa0ca3d65a970ce1ce Mon Sep 17 00:00:00 2001 From: Justin Bassett Date: Sat, 17 Jul 2021 15:17:26 -0700 Subject: [PATCH 27/45] Add tool to combine exec-time results --- util/CPU2006/combine-exec.py | 94 ++++++++++++++++++++++++++++++++++++ 1 file changed, 94 insertions(+) create mode 100755 util/CPU2006/combine-exec.py diff --git a/util/CPU2006/combine-exec.py b/util/CPU2006/combine-exec.py new file mode 100755 index 00000000..a100f2a6 --- /dev/null +++ b/util/CPU2006/combine-exec.py @@ -0,0 +1,94 @@ +#!/usr/bin/env python3 + +from io import StringIO +import csv +import re +import sys +import argparse +from contextlib import ExitStack +from typing import Iterable, List, Tuple +from openpyxl import Workbook +from openpyxl.utils import get_column_letter + + +class DuplicateDataError(Exception): + def __init__(self, old, new, message): + self.old = old + self.new = new + self.message = message + + super().__init__(f'{message} old: {old} -> new: {new}') + + +def is_blank_row(row: List[str]) -> bool: + return not row or all(cell in ('', 'NR') for cell in row[1:]) + + +def merge_tables(str_tables: Iterable[str]) -> str: + data = dict() + tables = [list(csv.reader(table.splitlines())) for table in str_tables] + + for row in tables[0]: + if row: + data[row[0]] = row + + for table in tables: + for row in table: + if not is_blank_row(row): + if row[0] in data: + if not is_blank_row(data[row[0]]) and data[row[0]] != row: + raise DuplicateDataError(data[row[0]], row, f'Duplicate data for {row[0]}.') + data[row[0]] = row + + out = StringIO() + writer = csv.writer(out) + for row in tables[0]: + if not row: + continue + best_row = data[row[0]] + writer.writerow(best_row) + + return out.getvalue() + + +_RE_FOO_RESULTS_TABLE = re.compile(r'"(?P\S+ Results) Table"') + + +def extract_tables(contents: str) -> Iterable[Tuple[str, str]]: + for m in _RE_FOO_RESULTS_TABLE.finditer(contents): + tbl_start = contents.find('\n\n', m.end()) + 1 + tbl_end = contents.find('\n\n', tbl_start) + yield (m['tbl_name'], contents[tbl_start:tbl_end]) + + +def main(files, out: str): + wb = Workbook() + files = [f.read() for f in files] + xy = list(extract_tables(files[0])) + tbls = map(extract_tables, files) + for tbl_group in zip(*tbls): + assert len(set(name for name, _ in tbl_group)) == 1 + ws = wb.create_sheet(tbl_group[0][0]) + + str_tables = (tbl for _, tbl in tbl_group) + merged = merge_tables(str_tables) + for row in csv.reader(merged.splitlines()): + ws.append(row) + for i, _ in enumerate(row): + ws.column_dimensions[get_column_letter(i + 1)].bestFit = True + + wb.remove(wb.active) + wb.save(out) + + +if __name__ == '__main__': + parser = argparse.ArgumentParser(description='Merges multiple CPU2017 exec time csv results together') + parser.add_argument('-o', '--output', required=True, help='Where to write the output file') + parser.add_argument('csvs', nargs='+', help='The files to merge') + + args = parser.parse_args() + + with ExitStack() as stack: + files = [stack.enter_context(open(f, 'r')) for f in args.csvs] + + main(files, args.output) From b36ad9477302f51f8df25fe677c644595f2e6214 Mon Sep 17 00:00:00 2001 From: Justin Bassett Date: Sun, 18 Jul 2021 22:57:15 -0700 Subject: [PATCH 28/45] Add GT edge metrics, repeat metrics for opt blocks --- util/gt_analysis/gt_cmp.py | 47 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) diff --git a/util/gt_analysis/gt_cmp.py b/util/gt_analysis/gt_cmp.py index 039939ca..06b2a804 100755 --- a/util/gt_analysis/gt_cmp.py +++ b/util/gt_analysis/gt_cmp.py @@ -86,6 +86,40 @@ def blk_relative_cost(nogt, gt) -> Tuple[int, int]: return no_sum, yes_sum +RP_GT_FINISHED = 'GraphTransRPNodeSuperiorityFinished' +ILP_GT_FINISHED = 'GraphTransILPNodeSuperiorityFinished' +RP_ILP_GT_FINISHED = 'GraphTransOccupancyPreservingILPNodeSuperiorityFinished' + + +def _edges_added_for_blk(blk: Block, fin_id: str) -> int: + if fin_id not in blk: + return 0 + return sum(ev['superior_edges'] for ev in blk[fin_id]) + + +def edges_added_for_blk(blk: Block) -> int: + return _edges_added_for_blk(blk, RP_GT_FINISHED) + _edges_added_for_blk(blk, ILP_GT_FINISHED) + _edges_added_for_blk(blk, RP_ILP_GT_FINISHED) + + +def _edges_removed_for_blk(blk: Block, fin_id: str) -> int: + if fin_id not in blk: + return 0 + try: + return sum(ev['removed_edges'] for ev in blk[fin_id]) + except KeyError: + return 0 + + +def edges_removed_for_blk(blk: Block) -> int: + return _edges_removed_for_blk(blk, RP_GT_FINISHED) + _edges_removed_for_blk(blk, ILP_GT_FINISHED) + _edges_removed_for_blk(blk, RP_ILP_GT_FINISHED) + + +def edges_rp_rejected_for_blk(blk: Block) -> int: + if RP_ILP_GT_FINISHED not in blk: + return 0 + return sum(ev['failed_rp'] for ev in blk[RP_ILP_GT_FINISHED]) + + def compute_stats(nogt: Logs, gt: Logs, *, pass_num: int, total_compile_time_seconds): nogt_all, gt_all = nogt, gt @@ -109,6 +143,7 @@ def compute_stats(nogt: Logs, gt: Logs, *, pass_num: int, total_compile_time_sec 'Total Compile Time (s) (all benchsuite) (No GT)': total_compile_time_seconds(nogt_all), 'Total Compile Time (s) (all benchsuite) (GT)': total_compile_time_seconds(gt_all), + 'Total Sched Time (No GT)': sched_time(nogt), 'Total Sched Time (GT)': sched_time(gt), 'Enum Time (No GT)': utils.sum_stat_for_all(enum_time_for_blk, nogt), @@ -118,12 +153,24 @@ def compute_stats(nogt: Logs, gt: Logs, *, pass_num: int, total_compile_time_sec 'Heuristic Time (No GT)': compile_times.heuristic_time(nogt), 'Heuristic Time (GT)': compile_times.heuristic_time(gt), 'Total GT Time': utils.sum_stat_for_all(total_gt_elapsed_for_blk, gt), + 'Edges Added': utils.sum_stat_for_all(edges_added_for_blk, gt), + 'Edges Removed': utils.sum_stat_for_all(edges_removed_for_blk, gt), + 'Edges Rejected by RP': utils.sum_stat_for_all(edges_rp_rejected_for_blk, gt), 'Total Sched Time (opt. blks only) (No GT)': sched_time(nogt_opt), 'Total Sched Time (opt. blks only) (GT)': sched_time(gt_opt), 'Enum Time (opt. blocks only) (No GT)': utils.sum_stat_for_all(enum_time_for_blk, nogt_opt), 'Enum Time (opt. blocks only) (GT)': utils.sum_stat_for_all(enum_time_for_blk, gt_opt), + 'Lower Bound Time (opt. blocks only) (No GT)': compile_times.first_lower_bound_time(nogt_opt), + 'Lower Bound Time (opt. blocks only) (GT)': compile_times.first_lower_bound_time(gt_opt), + 'Heuristic Time (opt. blocks only) (No GT)': compile_times.heuristic_time(nogt_opt), + 'Heuristic Time (opt. blocks only) (GT)': compile_times.heuristic_time(gt_opt), + 'Total GT Time (opt. blocks only)': utils.sum_stat_for_all(total_gt_elapsed_for_blk, gt_opt), + 'Edges Added (opt. blocks only)': utils.sum_stat_for_all(edges_added_for_blk, gt_opt), + 'Edges Removed (opt. blocks only)': utils.sum_stat_for_all(edges_removed_for_blk, gt_opt), + 'Edges Rejected by RP (opt. blocks only)': utils.sum_stat_for_all(edges_rp_rejected_for_blk, gt_opt), + 'Block Cost - Relative (No GT)': nogt_rel, 'Block Cost - Relative (GT)': gt_rel, 'Block Cost (No GT)': utils.sum_stat_for_all(block_stats.block_cost, nogt), From e087e9e9cbb5ee639163d1b860972c61a3a389c5 Mon Sep 17 00:00:00 2001 From: Justin Bassett Date: Sun, 18 Jul 2021 23:06:07 -0700 Subject: [PATCH 29/45] Allow multiple heuristic results to exist If multiple exist, then we only re-evaluated the heuristic relative to the updated lower bounds after graph transformations, but we didn't actually recompute the heuristic result. --- util/analyze/lib/compile_times.py | 8 ++++++++ util/gt_analysis/gt_cmp.py | 8 ++++---- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/util/analyze/lib/compile_times.py b/util/analyze/lib/compile_times.py index 8af24bde..25ac986d 100755 --- a/util/analyze/lib/compile_times.py +++ b/util/analyze/lib/compile_times.py @@ -28,6 +28,14 @@ def heuristic_time(logs): return sum(heuristic_time_for_blk(b) for b in logs) +def first_heuristic_time_for_blk(blk: Block) -> int: + return blk['HeuristicResult'][0]['elapsed'] + + +def first_heuristic_time(logs): + return sum(first_heuristic_time_for_blk(b) for b in logs) + + def first_lower_bound_time_for_blk(blk: Block) -> int: return blk['CostLowerBound'][0]['elapsed'] diff --git a/util/gt_analysis/gt_cmp.py b/util/gt_analysis/gt_cmp.py index 06b2a804..3077b80b 100755 --- a/util/gt_analysis/gt_cmp.py +++ b/util/gt_analysis/gt_cmp.py @@ -150,8 +150,8 @@ def compute_stats(nogt: Logs, gt: Logs, *, pass_num: int, total_compile_time_sec 'Enum Time (GT)': utils.sum_stat_for_all(enum_time_for_blk, gt), 'Lower Bound Time (No GT)': compile_times.first_lower_bound_time(nogt), 'Lower Bound Time (GT)': compile_times.first_lower_bound_time(gt), - 'Heuristic Time (No GT)': compile_times.heuristic_time(nogt), - 'Heuristic Time (GT)': compile_times.heuristic_time(gt), + 'Heuristic Time (No GT)': compile_times.first_heuristic_time(nogt), + 'Heuristic Time (GT)': compile_times.first_heuristic_time(gt), 'Total GT Time': utils.sum_stat_for_all(total_gt_elapsed_for_blk, gt), 'Edges Added': utils.sum_stat_for_all(edges_added_for_blk, gt), 'Edges Removed': utils.sum_stat_for_all(edges_removed_for_blk, gt), @@ -164,8 +164,8 @@ def compute_stats(nogt: Logs, gt: Logs, *, pass_num: int, total_compile_time_sec 'Lower Bound Time (opt. blocks only) (No GT)': compile_times.first_lower_bound_time(nogt_opt), 'Lower Bound Time (opt. blocks only) (GT)': compile_times.first_lower_bound_time(gt_opt), - 'Heuristic Time (opt. blocks only) (No GT)': compile_times.heuristic_time(nogt_opt), - 'Heuristic Time (opt. blocks only) (GT)': compile_times.heuristic_time(gt_opt), + 'Heuristic Time (opt. blocks only) (No GT)': compile_times.first_heuristic_time(nogt_opt), + 'Heuristic Time (opt. blocks only) (GT)': compile_times.first_heuristic_time(gt_opt), 'Total GT Time (opt. blocks only)': utils.sum_stat_for_all(total_gt_elapsed_for_blk, gt_opt), 'Edges Added (opt. blocks only)': utils.sum_stat_for_all(edges_added_for_blk, gt_opt), 'Edges Removed (opt. blocks only)': utils.sum_stat_for_all(edges_removed_for_blk, gt_opt), From a2e720241acf76dbc4aac49edebe74c557479d26 Mon Sep 17 00:00:00 2001 From: Justin Bassett Date: Sun, 18 Jul 2021 23:09:46 -0700 Subject: [PATCH 30/45] Fix validation-test for multiple lower bounds --- util/misc/validation-test.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/util/misc/validation-test.py b/util/misc/validation-test.py index c50d8858..cb33705b 100755 --- a/util/misc/validation-test.py +++ b/util/misc/validation-test.py @@ -125,7 +125,7 @@ def extract_dag_info(logs: Logs) -> Dict[str, List[List[DagInfo]]]: is_optimal = best_result['optimal'] except KeyError: try: - best_result = block.single('HeuristicResult') + best_result = block['HeuristicResult'][-1] is_optimal = best_result['cost'] == 0 or \ 'INFO: Marking SLIL list schedule as optimal due to zero PERP.' in block.raw_log except KeyError: @@ -138,7 +138,7 @@ def extract_dag_info(logs: Logs) -> Dict[str, List[List[DagInfo]]]: benchmark=block.benchmark, num_instructions=block.single('ProcessDag')['num_instructions'], pass_num=pass_num(block), - lower_bound=block.single('CostLowerBound')['cost'], + lower_bound=block['CostLowerBound'][-1]['cost'], relative_cost=best_result['cost'], length=best_result['length'], is_optimal=is_optimal, From 01bdd211b7e8126d702192e0e2fee67ed7e4ce34 Mon Sep 17 00:00:00 2001 From: Justin Bassett Date: Mon, 19 Jul 2021 11:25:06 -0700 Subject: [PATCH 31/45] Fix validation-test args --- util/misc/validation-test.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/util/misc/validation-test.py b/util/misc/validation-test.py index cb33705b..40ee0706 100755 --- a/util/misc/validation-test.py +++ b/util/misc/validation-test.py @@ -337,9 +337,9 @@ def print_small_summary(mismatches: List[Mismatch]): parser.add_argument('-q', '--quiet', action='store_true', help='Only print mismatch info, and only if there are mismatches') - parser.add_argument('--no-summarize-largest-cost-difference', action='store_false', + parser.add_argument('--no-summarize-largest-cost-difference', action='store_true', help='Do not summarize the mismatches with the biggest difference in cost') - parser.add_argument('--no-summarize-smallest-mismatches', action='store_false', + parser.add_argument('--no-summarize-smallest-mismatches', action='store_true', help='Do not summarize the mismatches with the smallest region size') parser.add_argument('--num-largest-cost-mismatches-print', type=int, default=10, From 7a2702c4b7a0dad2e0d3b12ce3443e0648750b17 Mon Sep 17 00:00:00 2001 From: Justin Bassett Date: Mon, 19 Jul 2021 11:26:57 -0700 Subject: [PATCH 32/45] Include benchmark name in validation-test --- util/misc/validation-test.py | 1 + 1 file changed, 1 insertion(+) diff --git a/util/misc/validation-test.py b/util/misc/validation-test.py index 40ee0706..e5b50f43 100755 --- a/util/misc/validation-test.py +++ b/util/misc/validation-test.py @@ -284,6 +284,7 @@ def print_block_info(index: int, mismatch: Mismatch): print(dedent(f'''\ {index}: Block Name: {mismatch.dag_id} + Benchmark: {mismatch.benchmark} Num Instructions: {mismatch.region_size} Length: {mismatch.lengths[0]} --> {mismatch.lengths[1]} Difference in cost: {cost_diff} From 49ecbacad936ef6aed674f123e394384e88620a4 Mon Sep 17 00:00:00 2001 From: Justin Bassett Date: Thu, 22 Jul 2021 22:01:10 -0700 Subject: [PATCH 33/45] Support multi-run modes in combine-exec.py --- util/CPU2006/combine-exec.py | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/util/CPU2006/combine-exec.py b/util/CPU2006/combine-exec.py index a100f2a6..ff96823b 100755 --- a/util/CPU2006/combine-exec.py +++ b/util/CPU2006/combine-exec.py @@ -6,7 +6,8 @@ import sys import argparse from contextlib import ExitStack -from typing import Iterable, List, Tuple +from typing import Dict, Iterable, List, Tuple +from collections import Counter from openpyxl import Workbook from openpyxl.utils import get_column_letter @@ -25,28 +26,33 @@ def is_blank_row(row: List[str]) -> bool: def merge_tables(str_tables: Iterable[str]) -> str: - data = dict() + data: Dict[str, List[List[str]]] = dict() tables = [list(csv.reader(table.splitlines())) for table in str_tables] for row in tables[0]: if row: - data[row[0]] = row + data.setdefault(row[0], []).append(row) for table in tables: + nth: Dict[str, int] = Counter() for row in table: if not is_blank_row(row): + index = nth[row[0]] if row[0] in data: - if not is_blank_row(data[row[0]]) and data[row[0]] != row: - raise DuplicateDataError(data[row[0]], row, f'Duplicate data for {row[0]}.') - data[row[0]] = row + if not is_blank_row(data[row[0]][index]) and data[row[0]][index] != row: + raise DuplicateDataError(data[row[0]][index], row, f'Duplicate data for {row[0]}.') + data[row[0]][index] = row + nth[row[0]] += 1 out = StringIO() writer = csv.writer(out) + nth: Dict[str, int] = Counter() for row in tables[0]: if not row: continue - best_row = data[row[0]] - writer.writerow(best_row) + index = nth[row[0]] + writer.writerow(data[row[0]][index]) + nth[row[0]] += 1 return out.getvalue() @@ -64,7 +70,6 @@ def extract_tables(contents: str) -> Iterable[Tuple[str, str]]: def main(files, out: str): wb = Workbook() files = [f.read() for f in files] - xy = list(extract_tables(files[0])) tbls = map(extract_tables, files) for tbl_group in zip(*tbls): assert len(set(name for name, _ in tbl_group)) == 1 From 35de44cc0669665d9dd9b9644def8ea723747651 Mon Sep 17 00:00:00 2001 From: Justin Bassett Date: Sun, 25 Jul 2021 17:53:14 -0700 Subject: [PATCH 34/45] Update validation-test.py for Adjusted PERP SCF --- util/misc/validation-test.py | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/util/misc/validation-test.py b/util/misc/validation-test.py index e5b50f43..4d23d1a2 100755 --- a/util/misc/validation-test.py +++ b/util/misc/validation-test.py @@ -12,6 +12,12 @@ import analyze from analyze import Logs +# Some reference point to compute occupancies against. +# This would ideally be the maximum possible occupancy so that the .cost property will never be negative +OCCUPANCY_REFERENCE_POINT = 10 + +SPILL_COST_WEIGHT = 0 + @dataclass class DagInfo: @@ -24,10 +30,20 @@ class DagInfo: relative_cost: int length: int is_optimal: bool + # Spill cost is not absolute for SCF = TARGET. By recording the baseline, we can adjust the costs. + target_occupancy: Optional[int] + spill_cost: int @property def cost(self): - return self.lower_bound + self.relative_cost + cost = self.lower_bound + self.relative_cost + if self.target_occupancy is not None: + # TargetOcc - SC is a "complement"-like operation, meaning that it undoes itself. + actual_occupancy = self.target_occupancy - self.spill_cost + absolute_spill_cost = OCCUPANCY_REFERENCE_POINT - actual_occupancy + cost += SPILL_COST_WEIGHT * absolute_spill_cost + + return cost class MismatchKind(Enum): @@ -133,6 +149,8 @@ def extract_dag_info(logs: Logs) -> Dict[str, List[List[DagInfo]]]: print(block.raw_log) exit(2) + target_occ = block.single('TargetOccupancy')['target'] if 'TargetOccupancy' in block else None + dags.setdefault(block.name, []).append(DagInfo( id=block.name, benchmark=block.benchmark, @@ -142,6 +160,8 @@ def extract_dag_info(logs: Logs) -> Dict[str, List[List[DagInfo]]]: relative_cost=best_result['cost'], length=best_result['length'], is_optimal=is_optimal, + spill_cost=best_result['spill_cost'], + target_occupancy=target_occ, )) for k, block_passes in dags.items(): @@ -338,6 +358,8 @@ def print_small_summary(mismatches: List[Mismatch]): parser.add_argument('-q', '--quiet', action='store_true', help='Only print mismatch info, and only if there are mismatches') + parser.add_argument('--scw', '--spill-cost-weight', type=int, required=True, + help='The weight of the spill cost in the cost calculation. Only relevant if the reported spill costs are not absolute (e.g. SCF = TARGET); put any value otherwise.', dest='spill_cost_weight', metavar='SCW') parser.add_argument('--no-summarize-largest-cost-difference', action='store_true', help='Do not summarize the mismatches with the biggest difference in cost') parser.add_argument('--no-summarize-smallest-mismatches', action='store_true', @@ -358,6 +380,7 @@ def print_small_summary(mismatches: List[Mismatch]): NUM_SMALLEST_BLOCKS_PRINT = args.num_smallest_mismatches_print MISSING_LOWER_BOUND_DUMP_COUNT = args.missing_lb_dump_count MISSING_LOWER_BOUND_DUMP_LINES = args.missing_lb_dump_lines + SPILL_COST_WEIGHT = args.spill_cost_weight main( args.first, args.second, From 4e4174207816f29f71811cbd53007ba1c54a3993 Mon Sep 17 00:00:00 2001 From: Justin Bassett Date: Sun, 25 Jul 2021 17:58:11 -0700 Subject: [PATCH 35/45] Have validation-test.py output which block failed --- util/misc/validation-test.py | 52 ++++++++++++++++++++---------------- 1 file changed, 29 insertions(+), 23 deletions(-) diff --git a/util/misc/validation-test.py b/util/misc/validation-test.py index 4d23d1a2..3709170e 100755 --- a/util/misc/validation-test.py +++ b/util/misc/validation-test.py @@ -10,7 +10,7 @@ import argparse import analyze -from analyze import Logs +from analyze import Logs, Block # Some reference point to compute occupancies against. # This would ideally be the maximum possible occupancy so that the .cost property will never be negative @@ -19,6 +19,14 @@ SPILL_COST_WEIGHT = 0 +class BlockProcessingError(Exception): + block: Block + + def __init__(self, message: str, block: Block): + self.block = block + super().__init__(f'{message}:\n{block.raw_log}') + + @dataclass class DagInfo: id: str @@ -137,32 +145,30 @@ def extract_dag_info(logs: Logs) -> Dict[str, List[List[DagInfo]]]: for block in blocks: try: - best_result = block.single('BestResult') - is_optimal = best_result['optimal'] - except KeyError: try: + best_result = block.single('BestResult') + is_optimal = best_result['optimal'] + except KeyError: best_result = block['HeuristicResult'][-1] is_optimal = best_result['cost'] == 0 or \ 'INFO: Marking SLIL list schedule as optimal due to zero PERP.' in block.raw_log - except KeyError: - print('ERROR: unable to extract BestResult or HeuristicResult from block', file=sys.stderr) - print(block.raw_log) - exit(2) - - target_occ = block.single('TargetOccupancy')['target'] if 'TargetOccupancy' in block else None - - dags.setdefault(block.name, []).append(DagInfo( - id=block.name, - benchmark=block.benchmark, - num_instructions=block.single('ProcessDag')['num_instructions'], - pass_num=pass_num(block), - lower_bound=block['CostLowerBound'][-1]['cost'], - relative_cost=best_result['cost'], - length=best_result['length'], - is_optimal=is_optimal, - spill_cost=best_result['spill_cost'], - target_occupancy=target_occ, - )) + + target_occ = block.single('TargetOccupancy')['target'] if 'TargetOccupancy' in block else None + + dags.setdefault(block.name, []).append(DagInfo( + id=block.name, + benchmark=block.benchmark, + num_instructions=block.single('ProcessDag')['num_instructions'], + pass_num=pass_num(block), + lower_bound=block['CostLowerBound'][-1]['cost'], + relative_cost=best_result['cost'], + length=best_result['length'], + is_optimal=is_optimal, + spill_cost=best_result['spill_cost'], + target_occupancy=target_occ, + )) + except Exception as ex: + raise BlockProcessingError('Failed when processing block', block) from ex for k, block_passes in dags.items(): # Safe to modify dags while iterating because we use .items() to get a copy From d62e77b968c932a42aa63bfc6498c42c141c10ca Mon Sep 17 00:00:00 2001 From: Justin Bassett Date: Sun, 25 Jul 2021 18:18:07 -0700 Subject: [PATCH 36/45] Update validation-test.py to get spill cost --- util/misc/validation-test.py | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/util/misc/validation-test.py b/util/misc/validation-test.py index 3709170e..02ded1e3 100755 --- a/util/misc/validation-test.py +++ b/util/misc/validation-test.py @@ -126,6 +126,15 @@ def pass_num(block) -> int: return block.get('PassFinished', DEFAULT_PASS)[0]['num'] +def try_first(block: Block, *event_ids): + for index, event_id in enumerate(event_ids): + try: + return block[event_id] + except KeyError: + if index == len(event_ids) - 1: + raise + + def extract_dag_info(logs: Logs) -> Dict[str, List[List[DagInfo]]]: dags = {} @@ -145,13 +154,10 @@ def extract_dag_info(logs: Logs) -> Dict[str, List[List[DagInfo]]]: for block in blocks: try: - try: - best_result = block.single('BestResult') - is_optimal = best_result['optimal'] - except KeyError: - best_result = block['HeuristicResult'][-1] - is_optimal = best_result['cost'] == 0 or \ - 'INFO: Marking SLIL list schedule as optimal due to zero PERP.' in block.raw_log + best_result = try_first(block, 'BestResult', 'HeuristicResult')[-1] + best_result_info = try_first(block, 'DagSolvedOptimally', 'DagTimedOut', 'HeuristicResult')[-1] + is_optimal = best_result.get('optimal', False) or best_result['cost'] == 0 or \ + 'INFO: Marking SLIL list schedule as optimal due to zero PERP.' in block.raw_log target_occ = block.single('TargetOccupancy')['target'] if 'TargetOccupancy' in block else None @@ -164,7 +170,7 @@ def extract_dag_info(logs: Logs) -> Dict[str, List[List[DagInfo]]]: relative_cost=best_result['cost'], length=best_result['length'], is_optimal=is_optimal, - spill_cost=best_result['spill_cost'], + spill_cost=best_result_info['spill_cost'], target_occupancy=target_occ, )) except Exception as ex: From c889dae50d1fbf7aa42bb2d9b35b3ea152bb4f2b Mon Sep 17 00:00:00 2001 From: Justin Bassett Date: Thu, 29 Jul 2021 19:04:09 -0700 Subject: [PATCH 37/45] Revert "Update validation-test.py for Adjusted PERP SCF" This reverts commit 35de44cc0669665d9dd9b9644def8ea723747651. --- util/misc/validation-test.py | 26 +------------------------- 1 file changed, 1 insertion(+), 25 deletions(-) diff --git a/util/misc/validation-test.py b/util/misc/validation-test.py index 02ded1e3..c28276b9 100755 --- a/util/misc/validation-test.py +++ b/util/misc/validation-test.py @@ -12,12 +12,6 @@ import analyze from analyze import Logs, Block -# Some reference point to compute occupancies against. -# This would ideally be the maximum possible occupancy so that the .cost property will never be negative -OCCUPANCY_REFERENCE_POINT = 10 - -SPILL_COST_WEIGHT = 0 - class BlockProcessingError(Exception): block: Block @@ -38,20 +32,10 @@ class DagInfo: relative_cost: int length: int is_optimal: bool - # Spill cost is not absolute for SCF = TARGET. By recording the baseline, we can adjust the costs. - target_occupancy: Optional[int] - spill_cost: int @property def cost(self): - cost = self.lower_bound + self.relative_cost - if self.target_occupancy is not None: - # TargetOcc - SC is a "complement"-like operation, meaning that it undoes itself. - actual_occupancy = self.target_occupancy - self.spill_cost - absolute_spill_cost = OCCUPANCY_REFERENCE_POINT - actual_occupancy - cost += SPILL_COST_WEIGHT * absolute_spill_cost - - return cost + return self.lower_bound + self.relative_cost class MismatchKind(Enum): @@ -155,12 +139,9 @@ def extract_dag_info(logs: Logs) -> Dict[str, List[List[DagInfo]]]: for block in blocks: try: best_result = try_first(block, 'BestResult', 'HeuristicResult')[-1] - best_result_info = try_first(block, 'DagSolvedOptimally', 'DagTimedOut', 'HeuristicResult')[-1] is_optimal = best_result.get('optimal', False) or best_result['cost'] == 0 or \ 'INFO: Marking SLIL list schedule as optimal due to zero PERP.' in block.raw_log - target_occ = block.single('TargetOccupancy')['target'] if 'TargetOccupancy' in block else None - dags.setdefault(block.name, []).append(DagInfo( id=block.name, benchmark=block.benchmark, @@ -170,8 +151,6 @@ def extract_dag_info(logs: Logs) -> Dict[str, List[List[DagInfo]]]: relative_cost=best_result['cost'], length=best_result['length'], is_optimal=is_optimal, - spill_cost=best_result_info['spill_cost'], - target_occupancy=target_occ, )) except Exception as ex: raise BlockProcessingError('Failed when processing block', block) from ex @@ -370,8 +349,6 @@ def print_small_summary(mismatches: List[Mismatch]): parser.add_argument('-q', '--quiet', action='store_true', help='Only print mismatch info, and only if there are mismatches') - parser.add_argument('--scw', '--spill-cost-weight', type=int, required=True, - help='The weight of the spill cost in the cost calculation. Only relevant if the reported spill costs are not absolute (e.g. SCF = TARGET); put any value otherwise.', dest='spill_cost_weight', metavar='SCW') parser.add_argument('--no-summarize-largest-cost-difference', action='store_true', help='Do not summarize the mismatches with the biggest difference in cost') parser.add_argument('--no-summarize-smallest-mismatches', action='store_true', @@ -392,7 +369,6 @@ def print_small_summary(mismatches: List[Mismatch]): NUM_SMALLEST_BLOCKS_PRINT = args.num_smallest_mismatches_print MISSING_LOWER_BOUND_DUMP_COUNT = args.missing_lb_dump_count MISSING_LOWER_BOUND_DUMP_LINES = args.missing_lb_dump_lines - SPILL_COST_WEIGHT = args.spill_cost_weight main( args.first, args.second, From 38b4cff07b6c837d2f0715550bd356c78859ba84 Mon Sep 17 00:00:00 2001 From: Justin Bassett Date: Wed, 4 Aug 2021 15:42:55 -0700 Subject: [PATCH 38/45] Add functions to gather occupancy info --- util/analyze/lib/func_stats.py | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) create mode 100644 util/analyze/lib/func_stats.py diff --git a/util/analyze/lib/func_stats.py b/util/analyze/lib/func_stats.py new file mode 100644 index 00000000..6802a6b0 --- /dev/null +++ b/util/analyze/lib/func_stats.py @@ -0,0 +1,27 @@ +#!/usr/bin/env python3 + +import re +from itertools import chain +from typing import Iterable, List + +from analyze import Block + +''' +Function-level stats (not Block, Logs, or Benchmark level) +''' + +_RE_OCCUPANCY = re.compile(r'Final occupancy for function (?P\S+):(?P\d+)') + + +def _occupancy_info_in_block_log(block: Block) -> Iterable[int]: + for m in _RE_OCCUPANCY.finditer(block.raw_log): + yield int(m['value']) + + +def function_occupancy_info(logs: Iterable[Block]) -> List[int]: + return list(chain.from_iterable(map(_occupancy_info_in_block_log, logs))) + + +def avg_occupancy(logs: Iterable[Block]) -> float: + occ_info = function_occupancy_info(logs) + return sum(occ_info) / len(occ_info) if occ_info else 0.0 From 0976ac6bcddfecfd8ad972d1c2a3fc2a6f56eb8a Mon Sep 17 00:00:00 2001 From: Justin Bassett Date: Wed, 4 Aug 2021 16:40:23 -0700 Subject: [PATCH 39/45] Fix import utils --- util/analyze/imports/import_cpu2006.py | 5 +- util/analyze/imports/import_utils.py | 145 ++++++++++++++----------- 2 files changed, 82 insertions(+), 68 deletions(-) diff --git a/util/analyze/imports/import_cpu2006.py b/util/analyze/imports/import_cpu2006.py index c8b18363..23edd8a3 100755 --- a/util/analyze/imports/import_cpu2006.py +++ b/util/analyze/imports/import_cpu2006.py @@ -1,7 +1,6 @@ #!/usr/bin/env python3 import os -import re from . import import_utils @@ -13,8 +12,8 @@ def parse(file): with open(file, 'r') as f: return import_utils.parse_multi_bench_file( f.read(), - benchstart=re.compile(r'Building (?P\S*)'), - filename=re.compile(r'/[fc]lang\b.*\s(\S+\.\S+)\n')) + benchstart=r'Building (?P\S*)', + filename=r'/[fc]lang\b.*\s(\S+\.\S+)\n') if __name__ == '__main__': diff --git a/util/analyze/imports/import_utils.py b/util/analyze/imports/import_utils.py index 3454cb67..5b5baa22 100644 --- a/util/analyze/imports/import_utils.py +++ b/util/analyze/imports/import_utils.py @@ -1,13 +1,15 @@ -import pickle -import json import itertools +import json +import pickle import re import sys -from collections import namedtuple +from dataclasses import dataclass +from typing import List, Match, Optional, Pattern, Union -from .._types import Logs, Benchmark, Block +from .._types import Benchmark, Block, Logs -_RE_REGION_INFO = re.compile(r'EVENT:.*ProcessDag.*"name": "(?P[^"]*)"') +_REGION_DELIMITER = 'INFO: ********** Opt Scheduling **********' +_RE_REGION_DELIMITER = re.compile(re.escape(_REGION_DELIMITER)) def import_main(parsefn, *, description): @@ -24,18 +26,39 @@ def import_main(parsefn, *, description): pickle.dump(result, f) -def parse_multi_bench_file(logtext, *, benchstart, filename=None): +def parse_multi_bench_file(logtext: str, *, benchstart: Union[Pattern, str], filename: Optional[Union[Pattern, str]] = None): + if filename is not None: + filename = re.compile(filename) + benchstart = re.compile(benchstart) + + def parse_bench(benchm: Match, nextm: Union[Match, _DummyEnd], is_first: bool = False): + # The RE can specify any extra properties. + info = benchm.groupdict() + # If this is the first benchmark in the file, we want to start from the + # start of the file so that we don't lose any information. + start = 0 if is_first else benchm.start() + end = nextm.start() + return _parse_benchmark(info, logtext, + start, end, + filenamere=filename) + + bench_matches = list(benchstart.finditer(logtext)) benchmarks = [] - for benchm, nextm in _splititer(benchstart, logtext): - bench = _parse_benchmark(benchm.groupdict(), logtext, - benchm.end(), nextm.start(), - filenamere=filename) - benchmarks.append(bench) + + is_first: bool = True + for benchm, nextm in zip( + bench_matches, + [*bench_matches[1:], _DummyEnd(len(logtext))] + ): + benchmarks.append(parse_bench(benchm, nextm, is_first)) + is_first = False return Logs(benchmarks) -def parse_single_bench_file(logtext, *, benchname, filename=None): +def parse_single_bench_file(logtext, *, benchname, filename: Optional[Union[Pattern, str]] = None): + if filename is not None: + filename = re.compile(filename) return Logs([ _parse_benchmark( {'name': benchname}, @@ -45,21 +68,10 @@ def parse_single_bench_file(logtext, *, benchname, filename=None): ]) -_FileInfo = namedtuple('_FileInfo', ('filename', 'from_pos')) - - -def _each_cons(iterable, n): - ''' - Iterates over each consecutive n items of the iterable. - - _each_cons((1, 2, 3, 4), 2) # (1, 2), (2, 3), (3, 4) - ''' - iters = [None] * n - iters[0] = iter(iterable) - for i in range(1, n): - iters[i - 1], iters[i] = itertools.tee(iters[i - 1]) - next(iters[i], None) - return zip(*iters) +@dataclass +class _FileInfo: + filename: Optional[str] + from_pos: int class _DummyEnd: @@ -73,58 +85,59 @@ def end(self): return self._end -def _splititer(regex, text, pos=0, endpos=None): - ''' - 'Splits' the string by the regular expression, using an iterable. - Returns both where the regex matches and where it matched next (or the end). - ''' - if endpos is None: - endpos = len(text) - 1 +def _filename_info(filenamere: Optional[Pattern], logtext: str, start: int, end: int) -> List[_FileInfo]: + if filenamere is None: + filenamere = re.compile(r'.^') # RE that doesn't match anything + files = [] - return _each_cons( - itertools.chain(regex.finditer(text, pos, endpos), - (_DummyEnd(endpos + 1),)), - 2 - ) + for filem in filenamere.finditer(logtext, start, end): + filename = filem.group(1) + filestart = filem.end() + files.append(_FileInfo(filename=filename, from_pos=filestart)) + return files -def _parse_benchmark(info, logtext: str, start, end, *, filenamere): - NAME = info['name'] + +def _parse_benchmark(info: dict, logtext: str, start: int, end: int, *, filenamere: Optional[Pattern]): + BENCHNAME = info['name'] blocks = [] - if filenamere and filenamere.search(logtext, start, end): - files = [ - *(_FileInfo(filename=r.group(1), from_pos=r.end()) - for r in filenamere.finditer(logtext, start, end)), - _FileInfo(filename=None, from_pos=len(logtext)), - ][::-1] - else: - files = [ - _FileInfo(filename=None, from_pos=start), - _FileInfo(filename=None, from_pos=len(logtext)), - ][::-1] + files: List[_FileInfo] = _filename_info(filenamere, logtext, start, end) + if not files: + # We have an unknown file starting from the very beginning + files = [_FileInfo(filename=None, from_pos=start)] + + # Allow us to peek ahead by giving a dummy "file" at the end which will never match a block + files.append(_FileInfo(filename=None, from_pos=end)) + assert len(files) >= 2 + file_pos = 0 + + block_matches1, block_matches2 = itertools.tee(_RE_REGION_DELIMITER.finditer(logtext, start, end)) + next(block_matches2) # Drop first + block_matches2 = itertools.chain(block_matches2, (_DummyEnd(end),)) blocks = [] - for regionm, nextm in _splititer(_RE_REGION_INFO, logtext, start, end): - assert regionm.end() > files[-1].from_pos - if regionm.end() > files[-2].from_pos: - files.pop() + is_first = True + for regionm, nextm in zip(block_matches1, block_matches2): + region_start = regionm.end() + if region_start > files[file_pos + 1].from_pos: + file_pos += 1 + + assert region_start > files[file_pos].from_pos - try: - filename = files[-1].filename - except NameError: - filename = None + filename = files[file_pos].filename if files[file_pos] else None regioninfo = { - 'name': regionm['name'], 'file': filename, - 'benchmark': NAME, + 'benchmark': BENCHNAME, } - block = _parse_block(regioninfo, logtext, - regionm.start() - 1, nextm.start()) - blocks.append(block) + blk_start = start if is_first else regionm.start() + blk_end = nextm.start() + blocks.append(_parse_block(regioninfo, logtext, + blk_start, blk_end)) + is_first = False return Benchmark(info, blocks) @@ -132,6 +145,8 @@ def _parse_benchmark(info, logtext: str, start, end, *, filenamere): def _parse_block(info, logtext: str, start, end): events = _parse_events(logtext, start, end) raw_log = logtext[start:end] + assert 'ProcessDag' in events + info['name'] = events['ProcessDag'][0]['name'] return Block(info, raw_log, events) From da627e1d992181d2da0c58351fcfadae2b53d2f9 Mon Sep 17 00:00:00 2001 From: Justin Bassett Date: Wed, 4 Aug 2021 16:55:08 -0700 Subject: [PATCH 40/45] Add main function to func_stats --- util/analyze/lib/block_stats.py | 2 +- util/analyze/lib/func_stats.py | 28 +++++++++++++++++++++++++++- 2 files changed, 28 insertions(+), 2 deletions(-) mode change 100644 => 100755 util/analyze/lib/func_stats.py diff --git a/util/analyze/lib/block_stats.py b/util/analyze/lib/block_stats.py index 56ab7089..cfbc3814 100755 --- a/util/analyze/lib/block_stats.py +++ b/util/analyze/lib/block_stats.py @@ -88,4 +88,4 @@ def compute_block_stats(logs: Logs): results = utils.foreach_bench(compute_block_stats, args.logs) - args.format(result) + args.format(results) diff --git a/util/analyze/lib/func_stats.py b/util/analyze/lib/func_stats.py old mode 100644 new mode 100755 index 6802a6b0..d9d88daf --- a/util/analyze/lib/func_stats.py +++ b/util/analyze/lib/func_stats.py @@ -1,10 +1,13 @@ #!/usr/bin/env python3 +import argparse import re +import sys from itertools import chain from typing import Iterable, List -from analyze import Block +import analyze +from analyze import Block, ioutils, utils ''' Function-level stats (not Block, Logs, or Benchmark level) @@ -25,3 +28,26 @@ def function_occupancy_info(logs: Iterable[Block]) -> List[int]: def avg_occupancy(logs: Iterable[Block]) -> float: occ_info = function_occupancy_info(logs) return sum(occ_info) / len(occ_info) if occ_info else 0.0 + + +def raw_main(argv: List[str] = []): + parser = argparse.ArgumentParser( + description='Computes the block stats for the logs') + parser.add_argument('--stat', required=True, choices=('occ',), + help='Which stat to compute') + parser.add_argument('logs', help='The logs to analyze') + ioutils.add_output_format_arg(parser) + args = analyze.parse_args(parser, 'logs') + + STATS = { + 'occ': ('Average Occupancy', avg_occupancy), + } + label, f = STATS[args.stat] + + results = utils.foreach_bench(lambda bench: {label: f(bench)}, args.logs) + + args.format(sys.stdout, results) + + +if __name__ == '__main__': + raw_main(sys.argv) From 208fa9f3fa531525d482fbdf08cb92f267ac806c Mon Sep 17 00:00:00 2001 From: Justin Bassett Date: Sat, 7 Aug 2021 14:02:48 -0700 Subject: [PATCH 41/45] Add spill func-level info functions --- util/analyze/lib/func_stats.py | 61 ++++++++++++++++++++++++++-------- util/analyze/utils.py | 17 ++++++++++ 2 files changed, 64 insertions(+), 14 deletions(-) diff --git a/util/analyze/lib/func_stats.py b/util/analyze/lib/func_stats.py index d9d88daf..43b3c2d6 100755 --- a/util/analyze/lib/func_stats.py +++ b/util/analyze/lib/func_stats.py @@ -4,7 +4,7 @@ import re import sys from itertools import chain -from typing import Iterable, List +from typing import Callable, Iterable, List, Pattern, Tuple import analyze from analyze import Block, ioutils, utils @@ -14,40 +14,73 @@ ''' _RE_OCCUPANCY = re.compile(r'Final occupancy for function (?P\S+):(?P\d+)') +_RE_SPILLS = re.compile(r'Function: (?P\S*?)\nGREEDY RA: Number of spilled live ranges: (?P\d+)') +_RE_SPILLS_WEIGHTED = re.compile(r'SC in Function (?P\S*?) (?P-?\d+)') -def _occupancy_info_in_block_log(block: Block) -> Iterable[int]: - for m in _RE_OCCUPANCY.finditer(block.raw_log): - yield int(m['value']) +def compute_avg_values(fn_info: List[Tuple[str, int]], *, fn_filter: Callable[[str, int], bool] = lambda k, v: True) -> float: + return utils.average((v for k, v in fn_info if fn_filter(k, v)), len(fn_info)) -def function_occupancy_info(logs: Iterable[Block]) -> List[int]: - return list(chain.from_iterable(map(_occupancy_info_in_block_log, logs))) +def _fn_re_info(re: Pattern, logs: Iterable[Block], key='name', value='value') -> Iterable[Tuple[str, int]]: + for m in chain.from_iterable(re.finditer(blk.raw_log) for blk in logs): + yield (m[key], int(m[value])) -def avg_occupancy(logs: Iterable[Block]) -> float: - occ_info = function_occupancy_info(logs) - return sum(occ_info) / len(occ_info) if occ_info else 0.0 +def fn_occupancy_info(logs: Iterable[Block]) -> List[Tuple[str, int]]: + return list(_fn_re_info(_RE_OCCUPANCY, logs)) + + +def avg_occupancy(logs: Iterable[Block], *, fn_filter: Callable[[str, int], bool] = lambda k, v: True) -> float: + occ_info = fn_occupancy_info(logs) + return compute_avg_values(occ_info, fn_filter=fn_filter) + + +def fn_spill_info(logs: Iterable[Block]) -> List[Tuple[str, int]]: + return list(_fn_re_info(_RE_SPILLS, logs)) + + +def fn_weighted_spill_info(logs: Iterable[Block]) -> List[Tuple[str, int]]: + return list(_fn_re_info(_RE_SPILLS_WEIGHTED, logs)) + + +def total_spills(logs: Iterable[Block], *, fn_filter: Callable[[str, int], bool] = lambda k, v: True) -> int: + return sum(v for k, v in fn_spill_info(logs) if fn_filter(k, v)) + + +def total_weighted_spills(logs: Iterable[Block], *, fn_filter: Callable[[str, int], bool] = lambda k, v: True) -> int: + return sum(v for k, v in fn_weighted_spill_info(logs) if fn_filter(k, v)) def raw_main(argv: List[str] = []): parser = argparse.ArgumentParser( description='Computes the block stats for the logs') - parser.add_argument('--stat', required=True, choices=('occ',), + parser.add_argument('--stat', required=True, choices=('occ', 'spills', 'weighted-spills'), help='Which stat to compute') - parser.add_argument('logs', help='The logs to analyze') + parser.add_argument('--hot-only', help='A file with a space-separated list of functions to consider in the count') ioutils.add_output_format_arg(parser) - args = analyze.parse_args(parser, 'logs') + parser.add_argument('logs', help='The logs to analyze') + args = analyze.parse_args(parser, 'logs', args=argv) + + if args.hot_only: + with open(args.hot_only, 'r') as f: + contents = f.read() + fns = set(contents.split()) + def fn_filter(k, v): return k in fns + else: + def fn_filter(k, v): return True STATS = { 'occ': ('Average Occupancy', avg_occupancy), + 'spills': ('Spill Count', total_spills), + 'weighted-spills': ('Weighted Spill Count', total_weighted_spills), } label, f = STATS[args.stat] - results = utils.foreach_bench(lambda bench: {label: f(bench)}, args.logs) + results = utils.foreach_bench(lambda bench: {label: f(bench, fn_filter=fn_filter)}, args.logs) args.format(sys.stdout, results) if __name__ == '__main__': - raw_main(sys.argv) + raw_main(None) # Default to sys.argv diff --git a/util/analyze/utils.py b/util/analyze/utils.py index 39f68a80..b92355f0 100644 --- a/util/analyze/utils.py +++ b/util/analyze/utils.py @@ -1,3 +1,4 @@ +from typing import Iterable from ._types import * @@ -77,6 +78,7 @@ def zipped_keep_blocks_if(*logs, pred): except StopIteration: # There was nothing in zip(*logs)... old_pred = pred + def new_pred(*blks): try: return old_pred(*blks) @@ -111,3 +113,18 @@ def zip_benchmarks_if(*benchmarks): def sum_stat_for_all(stat, logs: Logs) -> int: return sum(stat(blk) for blk in logs) + + +def average(xs: Iterable[int], count=None) -> float: + try: + size = count if count is not None else len(xs) + return sum(xs) / size if size else 0.0 + except TypeError: + pass + + acc = 0 + num = 0 + for x in xs: + acc += x + num += 1 + return acc / num if num else 0.0 From cd0cb394d29c13365c7cd783b3f4953c2e9853b0 Mon Sep 17 00:00:00 2001 From: Justin Bassett Date: Tue, 10 Aug 2021 19:45:56 -0700 Subject: [PATCH 42/45] Add script to extract spill stats via regex Significantly faster than actually parsing --- util/misc/json2infolog.py | 0 util/misc/raw-spill-counts.py | 57 +++++++++++++++++++++++++++++++++++ 2 files changed, 57 insertions(+) mode change 100644 => 100755 util/misc/json2infolog.py create mode 100755 util/misc/raw-spill-counts.py diff --git a/util/misc/json2infolog.py b/util/misc/json2infolog.py old mode 100644 new mode 100755 diff --git a/util/misc/raw-spill-counts.py b/util/misc/raw-spill-counts.py new file mode 100755 index 00000000..48426b46 --- /dev/null +++ b/util/misc/raw-spill-counts.py @@ -0,0 +1,57 @@ +#!/usr/bin/env python3 +import argparse +import enum +import re +import sys +from typing import IO, List +from pathlib import Path + + +class argfile: + def __init__(self, default: IO, filename: Path, mode: str = 'r'): + self.__file = default if filename == '-' else open(filename, mode) + self.__should_close = filename != '-' + + def __enter__(self) -> IO: + return self.__file + + def __exit__(self, exc_type, exc_value, exc_traceback): + self.close() + + def close(self): + if self.__should_close: + self.__file.close() + + +class SpillStat(enum.Enum): + RAW = re.compile(r'GREEDY RA: Number of spilled live ranges: ([0-9]+)') + WEIGHTED = re.compile(r'SC in Function \S+ ([0-9])+') + + +def sum_stat(infile: IO, r: re.Match) -> int: + return sum( + sum(int(x) for x in r.findall(line)) for line in infile + ) + + +def main(infile: IO, outfile: IO, which: SpillStat = SpillStat.RAW): + spill_count = sum_stat(infile, which.value) + print(spill_count, file=outfile) + + +def raw_main(argv: List[str]) -> None: + parser = argparse.ArgumentParser(description='Extract spill counts') + parser.add_argument('--which', default='raw', choices=('weighted', 'raw'), + help='Whether to extract weighted or raw spills only. Default: raw') + parser.add_argument('-o', '--output', default='-', + help='Where to output the information to, - for stdout. Defaults to stdout') + parser.add_argument('file', help='The file to process, - for stdin.') + + args = parser.parse_args(argv) + + with argfile(sys.stdin, args.file, 'r') as infile, argfile(sys.stdout, args.output, 'w') as outfile: + main(infile, outfile, SpillStat[args.which.upper()]) + + +if __name__ == '__main__': + raw_main(sys.argv[1:]) From 088ded767b41a2bdea2bb07bd66ac198ffa8c1f0 Mon Sep 17 00:00:00 2001 From: Justin Bassett Date: Tue, 10 Aug 2021 22:04:08 -0700 Subject: [PATCH 43/45] Fix weighted spill sum regex --- util/misc/raw-spill-counts.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/util/misc/raw-spill-counts.py b/util/misc/raw-spill-counts.py index 48426b46..613ac49d 100755 --- a/util/misc/raw-spill-counts.py +++ b/util/misc/raw-spill-counts.py @@ -25,7 +25,7 @@ def close(self): class SpillStat(enum.Enum): RAW = re.compile(r'GREEDY RA: Number of spilled live ranges: ([0-9]+)') - WEIGHTED = re.compile(r'SC in Function \S+ ([0-9])+') + WEIGHTED = re.compile(r'SC in Function \S+ ([0-9]+)') def sum_stat(infile: IO, r: re.Match) -> int: @@ -49,7 +49,8 @@ def raw_main(argv: List[str]) -> None: args = parser.parse_args(argv) - with argfile(sys.stdin, args.file, 'r') as infile, argfile(sys.stdout, args.output, 'w') as outfile: + with argfile(sys.stdin, args.file, 'r') as infile, \ + argfile(sys.stdout, args.output, 'w') as outfile: main(infile, outfile, SpillStat[args.which.upper()]) From 68e1d2e0b4d45f2fda9c62a84068be557ca15b61 Mon Sep 17 00:00:00 2001 From: Justin Bassett Date: Thu, 12 Aug 2021 18:11:53 -0700 Subject: [PATCH 44/45] Augment raw-spill-counts.py with hot-only filter --- util/misc/raw-spill-counts.py | 61 +++++++++++++++++++++-------------- 1 file changed, 36 insertions(+), 25 deletions(-) diff --git a/util/misc/raw-spill-counts.py b/util/misc/raw-spill-counts.py index 613ac49d..61829608 100755 --- a/util/misc/raw-spill-counts.py +++ b/util/misc/raw-spill-counts.py @@ -3,39 +3,42 @@ import enum import re import sys -from typing import IO, List +from typing import Callable, IO, List from pathlib import Path +from contextlib import contextmanager -class argfile: - def __init__(self, default: IO, filename: Path, mode: str = 'r'): - self.__file = default if filename == '-' else open(filename, mode) - self.__should_close = filename != '-' +@contextmanager +def argfile(filename: str, mode: str): + if filename == '-': + yield sys.stdin if mode == 'r' else sys.stdout + else: + with open(filename, mode) as f: + yield f - def __enter__(self) -> IO: - return self.__file - def __exit__(self, exc_type, exc_value, exc_traceback): - self.close() - - def close(self): - if self.__should_close: - self.__file.close() +class SpillStat(enum.Enum): + RAW = re.compile(r'Function: (?P\S*?)\nGREEDY RA: Number of spilled live ranges: (?P\d+)') + WEIGHTED = re.compile(r'SC in Function (?P\S*?) (?P-?\d+)') -class SpillStat(enum.Enum): - RAW = re.compile(r'GREEDY RA: Number of spilled live ranges: ([0-9]+)') - WEIGHTED = re.compile(r'SC in Function \S+ ([0-9]+)') +def _sum_stat(s, r: re.Match, fn_filter: Callable[[str, int], bool]) -> int: + return sum(int(m['value']) for m in r.finditer(s) if fn_filter(m['name'], int(m['value']))) -def sum_stat(infile: IO, r: re.Match) -> int: - return sum( - sum(int(x) for x in r.findall(line)) for line in infile - ) +def sum_stat(infile: IO, r: re.Match, *, fn_filter: Callable[[str, int], bool] = lambda k, v: True) -> int: + try: + pos = infile.tell() + return _sum_stat(infile.read(), r, fn_filter) + except MemoryError: + infile.seek(pos) + return sum( + _sum_stat(line, r, fn_filter) for line in infile + ) -def main(infile: IO, outfile: IO, which: SpillStat = SpillStat.RAW): - spill_count = sum_stat(infile, which.value) +def main(infile: IO, outfile: IO, which: SpillStat = SpillStat.RAW, *, fn_filter: Callable[[str, int], bool] = lambda k, v: True): + spill_count = sum_stat(infile, which.value, fn_filter=fn_filter) print(spill_count, file=outfile) @@ -43,15 +46,23 @@ def raw_main(argv: List[str]) -> None: parser = argparse.ArgumentParser(description='Extract spill counts') parser.add_argument('--which', default='raw', choices=('weighted', 'raw'), help='Whether to extract weighted or raw spills only. Default: raw') + parser.add_argument('--hot-only', help='A file with a space-separated list of functions to consider in the count') parser.add_argument('-o', '--output', default='-', help='Where to output the information to, - for stdout. Defaults to stdout') parser.add_argument('file', help='The file to process, - for stdin.') args = parser.parse_args(argv) - with argfile(sys.stdin, args.file, 'r') as infile, \ - argfile(sys.stdout, args.output, 'w') as outfile: - main(infile, outfile, SpillStat[args.which.upper()]) + if args.hot_only: + content = Path(args.hot_only).read_text() + hot_fns = set(content.split()) + def fn_filter(k, v): return k in hot_fns + else: + def fn_filter(k, v): return True + + with argfile(args.file, 'r') as infile, \ + argfile(args.output, 'w') as outfile: + main(infile, outfile, SpillStat[args.which.upper()], fn_filter=fn_filter) if __name__ == '__main__': From cd53602db318635fd62654e5b7c4ee7219a8f766 Mon Sep 17 00:00:00 2001 From: Justin Bassett Date: Tue, 24 Aug 2021 11:38:35 -0700 Subject: [PATCH 45/45] Encourage running load_logs with python -i Without it, there was no interpreter history. With it, we get Python's interactive interpreter history. --- util/misc/load_logs.py | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) mode change 100755 => 100644 util/misc/load_logs.py diff --git a/util/misc/load_logs.py b/util/misc/load_logs.py old mode 100755 new mode 100644 index bf4420d1..81b56c22 --- a/util/misc/load_logs.py +++ b/util/misc/load_logs.py @@ -1,19 +1,10 @@ -#!/usr/bin/env python3 - import argparse -import sys import analyze -__INTERACTIVE = bool(getattr(sys, 'ps1', sys.flags.interactive)) - if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument('logs', nargs='+', help='The logs to analyze') args = analyze.parse_args(parser, 'logs') logs = args.logs - if __INTERACTIVE: - print('Parsed logs into variable `logs`') - else: - import code - code.interact(banner='Parsed logs into variable `logs`', exitmsg='', local={'logs': logs}) + print('Parsed logs into variable `logs`')