-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathreport2markdown.py
92 lines (78 loc) · 3.26 KB
/
report2markdown.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
import argparse
from collections import Counter, defaultdict, OrderedDict
import json
import numpy as np
import os
import pathlib
from pathlib import Path
import pandas as pd
import typing
from typing import Dict, List
from sklearn.base import TransformerMixin
import ml4logs
from ml4logs.data.hdfs import load_data_as_dict, load_labels
def import_result_files(result_dir):
metrics = []
for res_file in sorted(Path(result_dir).glob("*.json")):
print(res_file)
with open(res_file, "r") as f:
data = json.load(f, object_hook=OrderedDict)
for method, results in data["metrics"].items():
rec = OrderedDict([("method", method), ("preprocess", res_file.stem)])
rec.update(results)
metrics.append(rec)
df = pd.DataFrame(metrics)
return df
def markdown_table_string(result_df, method2str, pre2str):
df = result_df[["method", "preprocess", "precision", "recall", "f1", "mcc"]].copy()
df.rename(columns={"method": "Method", "preprocess": "Preprocess", "f1": "F1", "mcc": "MCC",
"precision": "Precision", "recall": "Recall"}, inplace=True)
df.sort_values("F1", ascending=False, inplace=True)
df['Method']= df['Method'].map(lambda m: method2str.get(m, m))
df['Preprocess']= df['Preprocess'].map(lambda p: pre2str.get(p, p))
df['Precision'] = df['Precision'].map("{:.3f}".format)
df['Recall'] = df['Recall'].map("{:.3f}".format)
df['F1'] = df['F1'].map("{:.3f}".format)
df['MCC'] = df['MCC'].map("{:.3f}".format)
def mark_best(col):
vals = pd.to_numeric(df[col]).values
best = np.max(vals)
mask = (vals == best)
df[col][mask] = df[col][mask].map("**{}**".format)
mark_best('Precision')
mark_best('Recall')
mark_best('F1')
mark_best('MCC')
return df.to_markdown(showindex=False)
def dir_path(string):
if os.path.isdir(string):
return string
else:
raise NotADirectoryError(string)
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Produce Markdown table from LAD experiment result JSON files.')
parser.add_argument('result_dir', type=dir_path, nargs=1,
help='result directory, e.g., "results/HDFS1"')
args = parser.parse_args()
df = import_result_files(args.result_dir[0])
method2str = {
"pca": "PCA",
"decision_tree": "Decision Tree",
"logistic_regression": "Logistic Regression",
"one_class_svm": "One Class SVM",
"lof_sklearn": "Local Outlier Factor (`sklearn`)",
"linear_svc": "Linear SVC",
"isolation_forest_sklearn": "Isolation Forest (`sklearn`)",
"lstm_classifier_m2o": "LSTM M2O",
"lstm_classifier_m2m": "LSTM M2M",
}
pre2str = {
"ibm_drain-unsupervised-loglizer": "Drain3",
"ibm_drain-loglizer": "Drain3",
"ibm_drain-unsupervised-loglizer": "Drain3",
"fasttext_timedeltas_minmax_blockmax-unsupervised-loglizer": "fastText block-max",
"fasttext_timedeltas_minmax_blockmax-loglizer": "fastText block-max",
"fasttext_timedeltas_minmax-seq2label": "fastText",
"fasttext_timedeltas_minmax-seq2label-m2m": "fastText",
}
print(markdown_table_string(df, method2str, pre2str))