forked from lingyongyan/Neural-Machine-Translation
-
Notifications
You must be signed in to change notification settings - Fork 10
/
helpers.py
65 lines (49 loc) · 1.79 KB
/
helpers.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
import math
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
import os
import re
import time
import unicodedata
def show_plot(points):
plt.figure()
fig, ax = plt.subplots()
loc = ticker.MultipleLocator(base=0.2) # put ticks at regular intervals
ax.yaxis.set_major_locator(loc)
plt.plot(points)
def as_minutes(s):
m = math.floor(s / 60)
s -= m * 60
return '%dm %ds' % (m, s)
def time_since(since, percent):
now = time.time()
s = now - since
es = s / (percent)
rs = es - s
return '%s (- %s)' % (as_minutes(s), as_minutes(rs))
# Lowercase, trim, and remove non-letter characters
def normalize_string(s):
s = unicode_to_ascii(s.lower().strip())
s = re.sub(r"([.!?])", r" \1", s)
s = re.sub(r"[^a-zA-Z.!?]+", r" ", s)
return s
# Turns a unicode string to plain ASCII (http://stackoverflow.com/a/518232/2809427)
def unicode_to_ascii(s):
chars = [c for c in unicodedata.normalize('NFD', s) if unicodedata.category(c) != 'Mn']
char_list = ''.join(chars)
return char_list
def validate_language(l):
p = './data/{}.txt'.format(l)
p = os.path.abspath(p)
print(p)
if not os.path.exists(p):
url = 'http://www.manythings.org/anki/'
print("{}.txt does not exist in the data directory. Please go to '{}' and download the data set.".format(l, url))
exit(1)
def validate_language_params(l):
is_missing = (not os.path.exists('./data/attention_params_{}'.format(l))
or not os.path.exists('./data/decoder_params_{}'.format(l))
or not os.path.exists('./data/encoder_params_{}'.format(l)))
if is_missing:
print("Model params for language '{}' do not exist in the data directory. Please train a new model for this language.".format(l))
exit(1)