-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathcreate_sentence.py
executable file
·140 lines (112 loc) · 5.25 KB
/
create_sentence.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# ******************************************************************************
# Argon Design Ltd. Project P9000 Argon
# (c) Copyright 2017 Argon Design Ltd. All rights reserved.
#
# Module : sjb_word_model
# Author : Steve Barlow
# $Id: create_sentence.py 18332 2017-06-18 16:18:38Z sjb $
# ******************************************************************************
"""Complete a sentence in the style of Theresa May's spoken contributions in parliament.
Use the function create_sentence() to create a sentence from given starting words.
Example:
>>> from create_sentence import create_sentence
>>> print(create_sentence('Brexit means', 0))
Brexit means i am very pleased to say that my right honourable friend the current
home secretary is taking that same passion to ensuring that the voices of the
victims of crime are heard and is taking that forward. i recognise, obviously,
the interest and the attention that the right honourable gentleman has given to
these issues, of course, he is a former health minister, and i would be happy
to meet him and others, as he suggests.
"""
from __future__ import print_function
from keras.models import load_model
import json
import pickle
import gensim as gs
import numpy as np
import os
import sys
import time
myDir = os.path.dirname(os.path.realpath(__file__)) + '/'
vmodel = gs.models.Word2Vec.load(myDir + 'vmodel')
with open(myDir + 'codetables.pickle', 'rb') as f:
wordCoding, codedWord = pickle.load(f)
wordvec_dim = len(vmodel['.']) # Length of word vector - nominally 300
sd_len = 12 # Number of words in model input pattern
# N.B. Need to make sure that Flask runs subsequent calls to create_sentence() in the same thread
# as this initialisation otherwise TF will fail with 'ValueError: Tensor Tensor("Softmax:0", ...'
# That seems to require that Flask is run with debug=False
import threading
init_thread = threading.current_thread()
model = load_model(myDir + 'trained_model.h5')
# Support functions
def prepare_text(text):
# Helper function to take input text and prepare it into tokenised words to be used with model
text = text.lower()
# Separate punctuation so it becomes separate tokens
text = text.replace('.', ' . ')
text = text.replace('!', ' ! ')
text = text.replace('?', ' ? ')
text = text.replace(',', ' , ')
text = text.replace(u'\u201c', u' \u201c ') # Left double quote '“'
text = text.replace(u'\u201d', u' \u201d ') # Right double quote '”'
parsedWords = text.split()
return parsedWords
def sample(a, temperature=1.0):
# Helper function to sample an index from a probability array
if temperature == 0.0:
return np.argmax(a)
np.random.random() # Make sure RNG initialised consistently
a = np.clip(a, 1E-20, 1.0) # Make sure we don't have any zeros
a = np.exp(np.log(a) / temperature)
a = a / np.sum(a)
a = a * 0.999999 # Added to fix sum(pvals[:-1]) > 1.0 error
return np.argmax(np.random.multinomial(1, a, 1))
# ---------------------------------------------------------------------------------------
# Core function
def create_sentence(input_text, seed=None, diversity=0.0):
"""Create a sentence from given starting words.
'input_words' is a string with the starting words. This can be empty, a single word or several words starting a
sentence. Upper case and punctuation are allowed. 'seed' is a number to initialise a random number generator.
Set this to a fixed value for repeatable behaviour or None for random operation. The generated sentence
including the initial words is returned as a string. This is all lower case with the only punctuation used being
',', '.', '?', '!', '“' and '”'.
"""
assert threading.current_thread() == init_thread
t1 = time.time()
np.random.seed(seed)
words = prepare_text(input_text)
correct_num_words = (['.'] * sd_len + words)[-sd_len:]
vecsentence = [vmodel[w] if w in vmodel else vmodel['.'] for w in correct_num_words]
output = input_text
for i in range(100):
preds = model.predict(np.reshape(vecsentence, (1,-1)), verbose=0)[0]
wcode = sample(preds, diversity)
word = codedWord[wcode]
prev_word = output[-1] if output != '' else None
if prev_word not in [None, u'\u201c'] and word not in ['.', '!', '?', ',', u'\u201d']:
output += ' '
output += word
vecsentence = vecsentence[1:] + [vmodel[word]]
if i > 20 and word in ['.', '!', '?']:
break
t2 = time.time()
#print('Response time = {:.1f}s'.format(t2-t1))
return output
# ---------------------------------------------------------------------------------------
# Main code (for testing)
if __name__ == '__main__':
if len(sys.argv) > 1:
# Use the supplied arguments as input sentences to complete
for input_words in sys.argv[1:]:
print(create_sentence(input_words, seed=0, diversity=0.0))
print()
else:
# Complete these standard examples
input_words = 'The right honourable gentleman'
print(create_sentence(input_words, seed=0, diversity=0.0))
print()
input_words = 'Brexit means'
print(create_sentence(input_words, seed=0, diversity=0.0))