-
Notifications
You must be signed in to change notification settings - Fork 0
/
angela.py
160 lines (139 loc) · 6.4 KB
/
angela.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
from random import randint
from string import punctuation
# Natural Language Toolkit imports
from nltk.tag import pos_tag
from nltk.tokenize import sent_tokenize, word_tokenize
#####################################################################
# Constants
#####################################################################
angela = "Angela: "
fail = "Your mum's face wishes her face's mum was... I mean... Wait..."
broken = "Well done. You broke Angela. I hope you're happy now."
modal_past = [
"could ",
"would "
]
questions = [
"What do you think about Ruth?",
"How was your day?",
"How are you doing?",
"What have you been up to?",
"What do you think about me?",
"Where's Rebecca?",
"When was the last time you saw Dave?",
"What's the best thing about cats?",
"Do you know what Andreja's plotting now?",
"How do you feel about glitter?" ,
"Ponies?",
"What should I do?",
"What do you like to do?"
]
bases_simple = [
"Your face ",
"Your mum "
]
bases_wishes = [
"Your mum wishes she ",
"Your mum wishes her face ",
"Your mum wishes your face "
]
bases = [simple + "is " for simple in bases_simple] + [wish + "was " for wish in bases_wishes]
#####################################################################
# Helper function definitions
#####################################################################
# Returns a random member from the given list
def random_member(from_list):
return from_list[randint(0, len(from_list) - 1)]
# Returns all token from the given list with the given tag
def tokens_by_tag(tagged_tokens, search_tag):
return [token for (token, tag) in tagged_tokens if tag == search_tag]
# Builds a response based on one of the official bases,
# a word in the source list, using and given connectives to bind them
def build_repartees(bases, from_list, connectives):
return [random_member(bases) + connectives + elem + "." for elem in from_list]
# Lowercases all words except proper nouns.
def decapitalise(string, groups):
newString = ""
for word in string:
if word not in groups["nouns_proper"] + groups["nouns_proper_plural"]:
newString += word.lower()
else:
newString += word
return newString
# Try to build a multiple word response
def multiword_responses(tokens, from_groups, to_groups):
helper_responses = []
responses = []
for verb in from_groups:
helper_responses += [tokens[i:] if i < len(tokens) else None for i in range(len(tokens)) if tokens[i] == verb]
# up to noun/punctuation/... or the end
for r in helper_responses:
r = [r[:(i + 1)] if i < len(r) else None for i in range(len(r)) if r[i] in to_groups]
if r and r[0]:
response = " ".join(r[0])
responses.append(response)
return responses
# Extract the part to use as the insult
# TODO Some cases don't work well (eg adverb - however, nouns - fluffy...)
def generate_all_possible_repartees(tokens, groups):
to_groups = groups["punctuation"] + groups["nouns"] + groups["nouns_plural"] + groups["nouns_proper"] + groups["nouns_proper_plural"]
mw_responses_non_3rd = multiword_responses(tokens, groups["verbs"] + groups["verbs_non_3rd"], to_groups)
mw_responses_3rd = multiword_responses(tokens, groups["verbs_3rd"], to_groups)
repartees = []
repartees += build_repartees(bases_simple, groups["verbs_3rd"] + mw_responses_3rd, "")
repartees += build_repartees(bases_wishes, groups["verbs"] + groups["verbs_non_3rd"] + mw_responses_non_3rd, random_member(modal_past))
repartees += build_repartees(bases_wishes, groups["verbs_past"], "")
repartees += build_repartees(bases_wishes, groups["verbs_past_participle"], random_member(modal_past) + "have ")
repartees += build_repartees(bases_wishes, groups["nouns_plural"] + groups["nouns_proper_plural"], "had ")
repartees += build_repartees(bases, groups["adjectives"] + groups["adjectives_comparative"] + groups["nouns_proper"], "")
repartees += build_repartees(bases, groups["adjectives_superlative"] + groups["adverbs_superlative"] + groups["nouns"], "the ")
repartees += build_repartees(bases, groups["adverbs"] + groups["adverbs_comparative"], "doing ")
if repartees:
return repartees
return [angela + fail + "\n" + broken]
#return [angela + random_member(bases) + response + "."]
# Explanation: http://www.monlp.com/2011/11/08/part-of-speech-tags/
def group_by_tag(tagged):
pos_punctuation = []
for punct in punctuation:
pos_punctuation += tokens_by_tag(tagged, punct)
return {
"adjectives" : tokens_by_tag(tagged, "JJ"),
"adjectives_comparative" : tokens_by_tag(tagged, "JJR"),
"adjectives_superlative" : tokens_by_tag(tagged, "JJS"),
"adverbs" : tokens_by_tag(tagged, "RB"),
"adverbs_comparative" : tokens_by_tag(tagged, "RBR"),
"adverbs_superlative" : tokens_by_tag(tagged, "RBS"),
"nouns" : tokens_by_tag(tagged, "NN"),
"nouns_plural" : tokens_by_tag(tagged, "NNS"),
"nouns_proper" : tokens_by_tag(tagged, "NNP"),
"nouns_proper_plural" : tokens_by_tag(tagged, "NNPS"),
"verbs" : tokens_by_tag(tagged, "VB"),
"verbs_past" : tokens_by_tag(tagged, "VBD"),
"verbs_3rd" : tokens_by_tag(tagged, "VBZ"),
"verbs_non_3rd" : tokens_by_tag(tagged, "VBP"),
"verbs_past_participle" : tokens_by_tag(tagged, "VBN"),
"verbs_gerund" : tokens_by_tag(tagged, "VBG"),
"pronouns_personal" : tokens_by_tag(tagged, "PRP"),
"punctuation" : pos_punctuation
}
#####################################################################
# Interactive part of the program
#####################################################################
if __name__=="__main__":
# Start talking to Angela
question = random_member(questions)
print angela + question
response = raw_input("You: ")
# Extract sentence parts (nouns, adjectives, maybe later verbs)
#response = "".join([char for char in response if char not in punctuation])
tokens = [word_tokenize(sentence) for sentence in sent_tokenize(response)]
# Flatten the list of lists
tokens = [elem for sublist in tokens for elem in sublist]
tagged = pos_tag(tokens) # this causes the lag - but I doubt I can fix it
groups = group_by_tag(tagged)
# Select an insult as a response
if ("Ruth" in question):
print angela + "Yeah, I agree."
else:
print angela + decapitalise(random_member(generate_all_possible_repartees(tokens, groups)), groups).capitalize()