-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathmessageparse.py
159 lines (122 loc) · 4.72 KB
/
messageparse.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
from fuzzywuzzy import fuzz
import json
from constants import MARKS, ATTENDANCE
'''
Hard-coded NLP library for MIT-HODOR
'''
def match(key, reference, threshold=67):
value = False
for ele in reference:
if fuzz.ratio(key.lower(), ele) > threshold:
value = True
break
return value
def attendance_match(key):
return match(key, reference=ATTENDANCE)
def marks_match(key):
return match(key, reference=MARKS)
# Returns the action(s) to be carried out for the given message
def intent(message = "", scraped_data={}):
message = message.split()
attendance = False
marks = False
subject = []
sub_list = extract_subjects(scraped_data)
for word in message:
if attendance != True and attendance_match(word) == True:
attendance = True
continue
if marks != True and marks_match(word) == True:
marks = True
continue
sub = subject_match(word, sub_list)
if sub is not None:
subject.append(sub)
actual_intent = {}
actual_intent['marks'] = marks
actual_intent['attendance'] = attendance
actual_intent['subject'] = subject
return actual_intent
# Returns an abbreviated and an easy to compare subject-list
def extract_subjects_easy(subjects):
rem_words = ['ENGINEERING ', ' OF ', ' - ', ' III', ' II', ' IV', ' I']
# Removing meaningless characters for abbreviation list
for word in rem_words[2:]:
subjects = [i.replace(word, '') for i in subjects]
# Abbreviations list
subject_abbr = []
for i, subject in enumerate(subjects):
subject_abbr.append("")
for x in subject.split():
subject_abbr[i] +=(x[0])
# Removing redundant and useless characters for the subject list
for word in rem_words[:2]:
subjects = [i.replace(word, ' ') for i in subjects]
subjects = [i.strip() for i in subjects]
return subjects, subject_abbr
# Extracts subject names from the data scraped
def extract_subjects(scraped_data):
subjects = [i for i in scraped_data["Subjects"]]
return subjects
# the real OG - checks whether a word matches a subject
def subject_match(key, original_subjects):
key = key.replace(".", "").lower()
cur_match = 0
max_match = 0
pos = None
subjects, subject_abbr = extract_subjects_easy(original_subjects)
# If subject given is an abbreviation
for i, sub in enumerate(subject_abbr):
cur_match = fuzz.ratio(key, sub.lower())
if cur_match > max_match:
max_match = cur_match
pos = i
if max_match == 100: # Can expect 100% accuracy for abbreviations
return original_subjects[pos]
else:
max_match = 0
# Not an abbreviation
for i, sub in enumerate(subjects):
cur_match = fuzz.ratio(key, sub.lower())
if cur_match > max_match:
max_match = cur_match
pos = i
if max_match > 50:
return original_subjects[pos]
else:
return None
'''
The main function to be called from this file.
The returned string can be sent to the user as message.
'''
def get_response(message="", scraped_data={}):
actual_intent = intent(message=message, scraped_data=scraped_data)
#print(actual_intent)
reply = ""
if actual_intent['subject'] != []:
for i in actual_intent['subject']:
reply += i
if actual_intent['marks'] is True:
reply += "\nMarks -\n"
if scraped_data['Subjects'][i] != {}:
reply += "Grade: {}\n".format(scraped_data['Subjects'][i]['Grade'])
for j in scraped_data['Subjects'][i]['Internals']:
reply += "{}: {}\n".format(j, scraped_data['Subjects'][i]['Internals'][j]["Obtained"])
if actual_intent['attendance'] is True:
reply += "\nAttendance -\n"
try:
if scraped_data['Attendance'][i] != {}:
reply += "{}:\
\nAttended: {}/{}\
\nPercentage: {}%".format(i, scraped_data['Attendance'][i]['Attended'],
scraped_data['Attendance'][i]['Total'],
scraped_data['Attendance'][i]['Percentage'])
except KeyError: # Hits KeyError if attendance is not available
pass
if not actual_intent['attendance'] and not actual_intent['marks']:
reply += "What do you want me to do?"
elif actual_intent['marks'] or actual_intent['attendance']:
reply += "Please mention the subject for which you want to see."
else:
reply += "Hodor?"
return reply