forked from iqiyi/FASPell
-
Notifications
You must be signed in to change notification settings - Fork 0
/
plot.py
110 lines (89 loc) · 4.8 KB
/
plot.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
import json
import matplotlib.pyplot as plt
import os
def plot(json_fname, results_fname, store_plots='', plots_to_latex=''):
name = '.'.join(json_fname.split('.')[:-1])
data = json.loads(open(json_fname, 'r', encoding='utf-8').read())
fi = open(results_fname, 'r', encoding='utf-8')
# data for a confidence-similarity graph
truely_detected_and_truely_corrected = [[], []]
truely_detected_and_falsely_corrected = [[], []]
falsely_detected = [[], []]
count_of_absence_of_correct_chars = [0, 0]
w3 = open(f'{name}_falsely_detected.txt', 'w', encoding='utf-8')
w4 = open(f'{name}_falsely_corrected.txt', 'w', encoding='utf-8')
for line, entry in zip(fi, data):
origin_num, wrong_sent, correct_sent, predict_sent, num = line.strip().split('\t')
pos_to_error = dict([(e["error_position"], e) for e in entry["errors"]])
for pos, (w, c, p) in enumerate(zip(wrong_sent, correct_sent, predict_sent)):
if w != c and w != p:
e = pos_to_error[pos]
assert e["corrected_to"] == p
if c != p:
candidatas = dict(sorted(list(e["candidates"].items()), reverse=True, key=lambda it: it[1])[:5])
absent = 'no'
if c not in candidatas:
count_of_absence_of_correct_chars[0] += 1
absent = 'yes'
truely_detected_and_falsely_corrected[0].append(e["confidence"])
truely_detected_and_falsely_corrected[1].append(e["similarity"])
w4.write('\t'.join([wrong_sent,
f'pos={pos}',
f'w={w}',
f'c={c}',
f'p={p}',
f'sim={e["similarity"]}',
f'absent={absent}']) + '\n')
else:
truely_detected_and_truely_corrected[0].append(e["confidence"])
truely_detected_and_truely_corrected[1].append(e["similarity"])
elif w == c and w != p:
e = pos_to_error[pos]
candidates = dict(sorted(list(e["candidates"].items()), reverse=True, key=lambda it: it[1])[:5])
absent = 'no'
if c not in candidates:
count_of_absence_of_correct_chars[1] += 1
absent = 'yes'
falsely_detected[0].append(e["confidence"])
falsely_detected[1].append(e["similarity"])
w3.write('\t'.join([wrong_sent,
f'pos={pos}',
f'w={w}',
f'c={c}',
f'p={p}',
f'sim={e["similarity"]}',
f'absent={absent}']) + '\n')
# print statistics
print(f'In {len(truely_detected_and_falsely_corrected[0])} falsely corrected characters,'
f' {count_of_absence_of_correct_chars[0]} are because of absent correct candidates.')
print(f'In {len(falsely_detected[0])} falsely detected characters,'
f' {count_of_absence_of_correct_chars[1]} are because of absent correct candidates.')
plt.plot(truely_detected_and_truely_corrected[0], truely_detected_and_truely_corrected[1], 'ro',
truely_detected_and_falsely_corrected[0], truely_detected_and_falsely_corrected[1], 'bo',
falsely_detected[0], falsely_detected[1], 'x')
plt.axis([0.0, 1.0, 0.0, 1.0])
plt.show()
if plots_to_latex:
produce_latex(truely_detected_and_truely_corrected,
truely_detected_and_falsely_corrected,
falsely_detected, os.path.join(plots_to_latex, f'{name}_latex.txt'))
if store_plots:
# plt.savefig(os.path.join(store_plots, f'{name}.png'))
axes = plt.gca()
# axes.set_xlim([0.95,1])
# axes.set_ylim([0.0,0.3])
plt.savefig(os.path.join(store_plots, f'{name}.png'))
axes.set_xlim([0.95,1])
axes.set_ylim([0.0,0.6])
plt.savefig(os.path.join(store_plots, f'{name}2.png'))
# plt.pause(0.0001)
# plt.clf()
def produce_latex(truely_detected_and_truely_corrected, truely_detected_and_falsely_corrected, falsely_detected, path):
f = open(path, 'w')
for a_x, a_y in zip(truely_detected_and_truely_corrected[0], truely_detected_and_truely_corrected[1]):
f.write(f'({a_x},{a_y})[a]')
for b_x, b_y in zip(truely_detected_and_falsely_corrected[0], truely_detected_and_falsely_corrected[1]):
f.write(f'({b_x},{b_y})[b]')
for c_x, c_y in zip(falsely_detected[0], falsely_detected[1]):
f.write(f'({c_x},{c_y})[c]')
f.close()