forked from tddschn/easygraph-bench
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathadd_graph_info_and_order_tool_to_csv.py
executable file
·150 lines (123 loc) · 4.11 KB
/
add_graph_info_and_order_tool_to_csv.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
#!/usr/bin/env python3
"""
Author : Xinyuan Chen <[email protected]>
Date : 2022-09-16
Purpose: Add graph info and order tool for an all.csv file
"""
import argparse
from io import StringIO
from pathlib import Path
import json
import csv
from typing import TypeVar
from collections.abc import Iterable
from utils_other import get_autorange_count
from config import (
get_method_order,
tool_order,
dataset_name_mapping,
dataset_homepage_mapping,
)
T = TypeVar('T')
def ordered_dedupe(seq: Iterable[T]) -> list[T]:
# https://stackoverflow.com/questions/480214/how-do-i-remove-duplicates-from-a-list-while-preserving-order
seen = set()
seen_add = seen.add
return [x for x in seq if not (x in seen or seen_add(x))]
def add_graph_info_and_order_tool_to_csv(
csv_path: Path,
tool_order: list[str] = tool_order,
add_graph_info: bool = True,
expand_dataset_name: bool = False,
add_autorange_iteration_count: bool = False,
) -> list[dict]:
r = csv.DictReader(csv_path.read_text().splitlines())
rows = list(r)
# expand dataset names
dataset_name_mapping_reversed = {v: k for k, v in dataset_name_mapping.items()}
for row in rows:
if row['dataset'] in dataset_name_mapping_reversed:
row['dataset'] = dataset_name_mapping_reversed[row['dataset']]
# add graph info
if add_graph_info:
gi = Path(__file__).parent / 'graph_info.json'
gi_d = json.loads(gi.read_text())
for row in rows:
if row['dataset'] in gi_d:
row |= gi_d[row['dataset']]
if row['dataset'] in dataset_homepage_mapping:
row['dataset_homepage'] = dataset_homepage_mapping[row['dataset']]
# add autorange iteration count
if add_autorange_iteration_count:
for row in rows:
row['iteration count'] = get_autorange_count(float(row['avg time']))
# order the records
method_order = get_method_order()
dataset_order = ordered_dedupe([x['dataset'] for x in rows])
rows_sorted = sorted(
filter(lambda x: not x['dataset'].startswith('stub'), rows),
key=(
lambda row: (
dataset_order.index(row['dataset']),
method_order.index(row['method'])
if row['method'] in method_order
else 999,
tool_order.index(row['tool']),
)
),
)
# revert to abbreviated dataset names
if not expand_dataset_name:
for row in rows_sorted:
if row['dataset'] in dataset_name_mapping:
row['dataset'] = dataset_name_mapping[row['dataset']]
return rows_sorted
def get_args():
"""Get command-line arguments"""
parser = argparse.ArgumentParser(
description='Add graph info and order tool for an all.csv file',
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
)
parser.add_argument(
'all_csv_path',
metavar='CSV PATH',
help='Path to the all.csv file generated by merge_bench_csv.py',
type=Path,
)
parser.add_argument(
'-A',
'--no-add-graph-info',
help='Do not add graph info to the outputted csv',
action='store_false',
)
parser.add_argument(
'-a',
'--abbreviated-dataset-names',
help='Use abbreviated dataset names',
action='store_true',
)
parser.add_argument(
'-c',
'--autorange-iteration-count',
help='Add autorange iteration count',
action='store_true',
)
return parser.parse_args()
def main():
"""Make a jazz noise here"""
args = get_args()
all_csv_path = args.all_csv_path
new_rows = add_graph_info_and_order_tool_to_csv(
all_csv_path,
add_graph_info=args.no_add_graph_info,
expand_dataset_name=not args.abbreviated_dataset_names,
add_autorange_iteration_count=args.autorange_iteration_count,
)
header = new_rows[0].keys()
s = StringIO()
w = csv.DictWriter(s, header)
w.writeheader()
w.writerows(new_rows)
print(s.getvalue().strip())
if __name__ == '__main__':
main()