-
Notifications
You must be signed in to change notification settings - Fork 3
/
PyBreak.py
105 lines (84 loc) · 3.83 KB
/
PyBreak.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
import string, nltk
question_to_id_mapping, id_to_question_mapping, ans_id_mappings = {}, {}, {}
TfIdfVector, tfidf_values = [], []
DEFAULT_ANSWER = "I am sorry! I couldn't find answers for your problem. My knowledge is limited"
def compute_similarity(query):
global sentence_tokens, DEFAULT_ANSWER
global TfIdfVector, tfidf_values
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
tfidf_query = TfIdfVector.transform([query])[0]
similarity_values = cosine_similarity(tfidf_query, tfidf_values)
flattened = similarity_values.flatten()
flattened.sort()
req_tfidf = flattened[-2]
if req_tfidf == 0:
return DEFAULT_ANSWER
else:
indexes = similarity_values.argsort()[0][::-1][:10]
return list(map(lambda idx: sentence_tokens[idx], indexes))
def get_answers(query):
global question_to_id_mapping, ans_id_mappings
print(' ' * 40 + '\rAlmost done! Just arranging content for you...', end='\r')
questions = compute_similarity(query)
if questions == DEFAULT_ANSWER:
return DEFAULT_ANSWER
ids = list(map(lambda qn: question_to_id_mapping[qn[:-1]], questions))
results = {}
for question_id in ids:
try:
results[question_id] = ans_id_mappings[question_id]
except:
pass
return results
def read_data():
global sentence_tokens, question_to_id_mapping, id_to_question_mapping, ans_id_mappings
global TfIdfVector, tfidf_values
import pickle
print("Please wait while I am thinking...", end='\r')
question_to_id_mapping, id_to_question_mapping = pickle.load(open('data/question_data.pickle', 'rb'))
ans_id_mappings, sentence_tokens = pickle.load(open('data/answer_data.pickle', 'rb'))
TfIdfVector, tfidf_values = pickle.load(open('data/model.pickle', 'rb'))
for question_id in id_to_question_mapping.keys():
if question_id not in ans_id_mappings:
continue
for i, answer1 in enumerate(ans_id_mappings[question_id]):
for j, answer2 in enumerate(ans_id_mappings[question_id]):
if answer1['score'] < answer2['score']:
temp = ans_id_mappings[question_id][i]
ans_id_mappings[question_id][i] = ans_id_mappings[question_id][j]
ans_id_mappings[question_id][j] = temp
def beautify_print(matches):
global id_to_question_mapping
import html2text
for question_id in matches.keys():
question = id_to_question_mapping[question_id]
if question_id not in ans_id_mappings:
continue
print('#' * 80)
print('Question title: ' + question['title'])
print('#' * 80)
print('Author: User #' + question['author'])
print('\nDescription: \n' + html2text.html2text(question['body'])); print('-' * 80)
for i, answer in enumerate(ans_id_mappings[question_id]):
print('[Answer ' + str(i+1) + '] by User #' + answer['author'] + ':', end='\n\n')
print(html2text.html2text(answer['body']))
print('-' * 80)
def PyBreak(your_code):
def PyBreak_internal(*args, **kwargs):
try:
return your_code(*args, **kwargs)
except Exception as err:
import traceback, sys
traceback.print_exc(file=sys.stderr)
exception = str(err)
if input('\n[PyBreak] Get help from stackoverflow? [y/n]: ').lower() == 'y':
print('-' * 80, end='\n')
read_data()
matches = get_answers(exception)
print(" " * 40 + "\rHere's some content from StackOverflow")
print('-' * 80, end='\n\n')
beautify_print(matches)
else:
print('Good luck with your error!')
return PyBreak_internal