-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathdecryptor.py
300 lines (260 loc) · 11 KB
/
decryptor.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
from gensim.models import KeyedVectors
from itertools import combinations, permutations
import pprint
import random
import argparse
import os
parser = argparse.ArgumentParser(description="Computer player for the game 'Decrypto'")
group = parser.add_mutually_exclusive_group()
group.add_argument("--german", "-g", action="store_true", help="use German version (default: English)")
group.add_argument("--example", "-e", action="store_true", help="use example Data (default: play")
group.add_argument("--beispiel", "-b", action="store_true", help="use german example Data (default: play)")
parser.add_argument("--fast", "-f", action="store_true",
help="use smaller vocabulary for smaller memory or faster loading time")
group.add_argument("--model", "-m", help="path to model, if not default")
args = parser.parse_args()
pp = pprint.PrettyPrinter()
def nice_input():
print(">>> ", end="")
return input()
def guess(digits, words):
global model, all_possibilities
results = []
for word in words:
# create dict for the results where:
# "word" -> the word as string
# "scores" -> scores as a dict with possible digits as key and their score as value
# "ranking" -> the ranked digits
result = {"word": word, "scores": {}, "ranking": []}
for dig, l in digits.items():
if len(l) > 0:
# calculate cosine similarities to each existing word
#similarities = [model.wv.similarity(word, w) for w in l]
similarities = [model.similarity(word, w) for w in l]
# calc avg and store digit and its score
result["scores"][dig] = sum(similarities) / len(similarities)
result["ranking"].append(dig)
else: # handle empty classes
result["scores"][dig] = 0
result["ranking"].append(dig)
# sort the ranking list with the scores from the scores dict
result["ranking"].sort(key=lambda e: result["scores"][e], reverse=True)
result["prediction"] = result["ranking"][0]
result["choice"] = 0 # index to show which score is used as prediction, might change during optimization
results.append(result)
# pp.pprint(results)
# find optimal solution
# not using "clever" optimization algorithm because it doesnt work and its only 24 options
# wood hammer optimization algorithm
if len(set(r["prediction"] for r in results)) != 3:
# check the average score of all permutations and find best one
print("Info: Not the best solution for every word possible.")
best_avg_score = 0
best_perm = random.choice(all_possibilities) # initialize best permutation randomly, if no best option is found
for perm in all_possibilities:
sum_of_scores = 0
for result, dig in zip(results, perm):
sum_of_scores += result["scores"][dig]
avg_score = sum_of_scores / 3
if avg_score > best_avg_score:
best_perm = perm
best_avg_score = avg_score
# update results
for ri, best_dig in zip(range(3), best_perm):
results[ri]["prediction"] = best_dig
results[ri]["choice"] = results[ri]["ranking"].index(best_dig)
# pp.pprint(results)
return results
# downloaded from https://wikipedia2vec.github.io/wikipedia2vec/pretrained/
ger_path = os.path.join("models", "dewiki_20180420_300d.txt")
eng_path = os.path.join("models", "enwiki_20180420_300d.txt")
print("Loading VSM...")
if args.german:
path = ger_path
else:
path = eng_path
if args.model:
path = args.model
lim = 50000 if args.fast else 500000
model = KeyedVectors.load_word2vec_format(path, binary=False, limit=lim)
print("Done.")
if args.example:
# Example data for debugging
# solution: 1: "china", 2: "bear", 3: "table", 4: "car"
digits = {1: ["bamboo", "asia", "country"],
2: ["brown", "grizzly", "animal"],
3: ["chair", "coffee", "furniture"],
4: ["fast", "road", "vehicle"]}
test = ["rice", "claw", "stool"]
g = guess(digits, test)
pprint(g)
quit()
elif args.beispiel:
# German example data for debugging
# solution: 1: "china", 2: "bär", 3: "tisch", 4: "auto"
digits = {1: ["bambus", "asien", "land"],
2: ["braun", "teddy", "tier"],
3: ["stuhl", "kaffee", "möbel"],
4: ["schnell", "straße", "fahrzeug"]}
test = ["reis", "fell", "sessel"]
g = guess(digits, test)
pprint(g)
quit()
# initialize data_structures
default_digits = {1: [], 2: [], 3: [], 4: []}
digits = dict(default_digits)
all_possibilities = [list(perm) for comb in combinations([1, 2, 3, 4], 3) for perm in permutations(comb)]
while True:
print("\nWill the computer be guessing (1) or giving clues (2)?")
while True:
selection = nice_input()
if selection not in ["1", "2"]:
print("Please write '1' for guessing or '2' for giving clues")
else:
print("")
break
# guessing routine
if selection == "1":
c = 0 # count of correct guesses
for round in range(1, 9):
test = [None, None, None]
print(f"\nRound #{round}")
# show the known clues
if round != 1:
for d, clues in digits.items():
print(f"{d}: {', '.join(clues) if clues else '––'}")
print("\n")
# ask for the three clues
for i, ord in zip(range(3), ["st", "nd", "rd"]):
print(f"Please input {i+1}{ord} clue:")
while True:
inp = nice_input()
#if inp not in model.wv.vocab:
if not model.has_index_for(inp):
print(f"'{inp}' is not in vocabulary – sorry!")
else:
test[i] = inp
break
g = guess(digits, test)
# print a prediction
for result in g:
print(result["prediction"])
print("\nWas the guess correct? (y/n)")
if nice_input() not in ["yes", "y", "Y"]:
if round == 8:
print("The computer lost...")
break
else:
# ask for correct answer
for word in test:
print(f"What is the correct digit for '{word}'?")
while True:
inp = nice_input()
if inp not in ["1", "2", "3", "4"]:
print("That is no valid digit, pleas enter '1', '2', '3' or '4'!")
else:
break
digits[int(inp)].append(word)
else:
c += 1
if c == 2: # winning condition
print("The computer won!")
print("Do you want to want to see the thinking of the computer? (y/n)")
if nice_input() in ["yes", "y", "Y"]:
pp.pprint(g)
break
else:
print("The computer has the first right guess! Way to go!")
for result in g:
digits[result["prediction"]].append(result["word"])
# giving clues routine
if selection == "2":
c = 0 # amount of correct guesses
print("Do you want to input words (1), or should the computer choose them (2)?")
while True:
inp = nice_input()
if inp not in ["1", "2"]:
print("That is no valid digit, pleas enter '1' or '2'!")
else:
break
selection2 = inp
# enter target words
targets = {}
if selection2 == "1":
for i in range(1, 5):
print(f"What is the word #{i}?")
while True:
inp = nice_input()
# if inp not in model.wv.vocab:
if not model.has_index_for(inp):
print(f"'{inp}' is not in vocabulary – sorry!")
else:
targets[i] = inp
os.system('cls' if os.name == 'nt' else 'clear')
break
# get target words from nouns list
if selection2 == "2":
if args.german:
path = "german-common-nouns.txt"
else:
path = "common-nouns.txt"
with open(path) as infile:
words = infile.read().split("\n")
for i in range(1,5):
targets[i] = random.choice(words)
# create mini thesaurus for clues
for i, t in targets.items():
#sims = model.wv.most_similar([t], topn=100)
sims = model.most_similar([t], topn=100)
sims = [s[0] for s in sims if not inp in s[0].lower() and not s[0].lower() in inp]
targets[i] = (t, sims)
for round in range(1, 9):
test = [None, None, None]
print(f"\nRound #{round}")
# show the known clues
if round != 1:
for d, clues in digits.items():
print(f"{d}: {', '.join(clues) if clues else '––'}")
print("")
this_perm = random.choice(all_possibilities) # draw random permutation
# give clues for each digit
print("The clues:")
clues = []
for digit in this_perm:
clue = random.choice(targets[digit][1])
for d2 in targets: # remove clue from all lists to prevent repetition
if clue in targets[d2][1]:
targets[d2][1].remove(clue)
clues.append(clue)
digits[digit].append(clue)
print(", ".join(clues))
# as for guess
guess = []
for clue in clues:
print(f"\nGuess digit for '{clue}'")
while True:
inp = nice_input()
if inp not in ["1", "2", "3", "4"]:
print("That is no valid digit, pleas enter '1', '2', '3' or '4'!")
else:
guess.append(int(inp))
break
if guess == this_perm:
c += 1
if c == 2:
print("Congratulations, the player has won!")
break
else:
print("The player has their first correct guess! This is the way.")
else:
print("I'm sorry, that's wrong. The correct solution is")
print(", ".join(str(x) for x in this_perm))
if round == 8:
print("The player lost...")
break
print("\nDo you want to play again? (y/n)")
if nice_input() not in ["yes", "y", "Y"]:
print("Good bye!")
quit()
else:
digits = dict(default_digits)