diff --git a/get_chat.py b/get_chat.py index 6e62e54..8a57774 100644 --- a/get_chat.py +++ b/get_chat.py @@ -9,15 +9,15 @@ def id_to_chat(videoId: str) -> list: return chat_list def id_to_chat_split(videoId: str) -> list: + """ return a list of dictionaries consisting of the chat message, user, timestamp, and moderator status""" - + chat = ChatDownloader().get_chat(f'https://www.youtube.com/watch?v={videoId}') chat_list = [] for message in chat: - msg = { - - } + + msg = { } msg['time'] = message['time_text'] msg['text'] = message['message'] diff --git a/get_transcript.py b/get_transcript.py index 052c94f..b26ef38 100644 --- a/get_transcript.py +++ b/get_transcript.py @@ -1,17 +1,9 @@ from youtube_transcript_api import YouTubeTranscriptApi -from youtube_transcript_api.formatters import JSONFormatter - -video_id ='Vi231_PujYI' -video_id = '4WBbrxZguqk' # lecture 7 21T1 def id_to_transcript(video_id: str) -> list: - transcript = YouTubeTranscriptApi.get_transcript(video_id) - # for message in transcript: - # print(message) - text = [message['text'] for message in transcript] + """ given a YouTube video ID return a list of dictionaries + containing the transcript text and the timestamp for the speech """ - return text - -if __name__ == "__main__": - text = id_to_transcript(video_id) - print(text) + transcript = YouTubeTranscriptApi.get_transcript(video_id) + + return [message['text'] for message in transcript] diff --git a/get_video.py b/get_video.py index 7cc6b03..2a2d2c0 100644 --- a/get_video.py +++ b/get_video.py @@ -3,8 +3,8 @@ from errors import CouldNotGetName def id_to_name(video_id) -> str: + """ get the title of a YouTube video given its video id """ vid = Video.getInfo(video_id, mode=ResultMode.json) - try: vid = json.loads(vid) except: diff --git a/magic.py b/magic.py index 1765ae8..f6323d8 100644 --- a/magic.py +++ b/magic.py @@ -1,60 +1,48 @@ -# lecture repeats the question +# matches questions from live chat with responses from lecturer/speaker + from fuzzywuzzy import fuzz -import get_transcript -import get_chat -def magic(transcript, chat): - keywords = ["ask", "questions", "ask", "question"] - qna = [] +def is_responding_to_chat(transcript_text, i): + """ determines if speech from the transcript is in response to the chat""" - for i in range(len(transcript) - 20): - line = transcript[i] - line_set = set(line.split(" ")) - keywords_set = set(keywords) - - if not keywords_set.isdisjoint(line_set): - question = transcript[i:i+3] - answer = transcript[i+3:i+20] - - # print(f'Question: {" ".join(question)}\n') - # print(f'Answer: {" ".join(answer)}\n') - - for comment in chat: - time = comment.split(" | ")[0] - potential_question = comment.split(" | ")[1] - if (fuzz.ratio(question, potential_question) > 48): - print(f'{potential_question}') - print(f'Question: {" ".join(question)}') - print(f'Answer: {" ".join(answer)}\n') - qna.append( - { - "question": " ".join(question), - "answer": " ".join(answer), - "time": time - } - ) - - return qna - -if __name__ == '__main__': - with open("transcript.txt") as f: - transcript = [line.strip() for line in f.readlines()] - - with open("chat.txt") as g: - chat = [line.strip() for line in g.readlines()] - - videoId = "4WBbrxZguqk" - transcript = get_transcript.id_to_transcript(videoId) - chat = get_chat.id_to_chat(videoId) + keywords_set = {"ask", "asking", "asks", "asked", "question", "questions", + "chat", "chats", "messages"} + + transcript_line = transcript_text[i] + transcript_line_set = set(transcript_line.split(" ")) - print(magic(transcript,chat)) + if not keywords_set.isdisjoint(transcript_line_set): + speech = transcript_text[i:i+3] + answer = transcript_text[i+3:i+20] + return (speech, answer) + + return (None, None) - marc = "what about alphabetical ordering of matt and mark" - chat = "What about alphabetical ordering of matt and marc" - # print(fuzz.ratio(marc, chat)) +def magic(transcript, chat): + """ matches questions from live chats with answers from lecture video""" + qna = {} - chat2 = "all of the students will do final exam at the same time?" + for i in range(len(transcript) - 20): - marc = "is it just returning one zero or negative one or does it return the magnitude of the difference" - chat = "Is it just returning 1/0/-1 or does it return the magnitude of the diffference?" - # print(fuzz.ratio(marc, chat)) + speech, answer = is_responding_to_chat(transcript, i) + if speech == None: + continue + + for comment in chat: + + potential_question = comment['text'] + answer_time = comment['time'] + + # if the lecturer repeats or paraphrases the question + if (fuzz.ratio(speech, potential_question) > 50): + if potential_question not in qna: + qna[potential_question] = { + "question": potential_question, + "answer": " ".join(answer), + "time": answer_time, + "moderator_response": False, + } + break + + qna = list(qna.values()) + return qna \ No newline at end of file diff --git a/match_chat.py b/match_chat.py index 598c6a5..ded2d13 100644 --- a/match_chat.py +++ b/match_chat.py @@ -1,4 +1,5 @@ -""" MATCH STUDENT QUESTIONS FROM THE LIVE CHAT WITH RESPONSES FROM MODERATORS """ +# matches questions from live chat with responses from moderators + import get_chat import re from fuzzywuzzy import fuzz @@ -6,9 +7,7 @@ def get_unique_users(chat): """ get a dictionary of unique users from the chat """ - unique_users = { - - } + unique_users = { } for message in chat: unique_users[message['user']] = message['privilege'] @@ -17,6 +16,7 @@ def get_unique_users(chat): def is_question(message, unique_users, q): """ returns the message if it is a question and None if it is not""" + line = get_tagged_user(message['text'], unique_users)[1] if '?' in line: @@ -36,8 +36,9 @@ def is_question(message, unique_users, q): def find_question_expanded(message, unique_users, q): - - (user_tagged, line) = get_tagged_user(message['text'], unique_users) + """ finding potential questions with extra checks""" + + user_tagged = get_tagged_user(message['text'], unique_users)[0] # check if this message is an answer to another student's question if user_tagged != None and unique_users[user_tagged] == 'member': @@ -51,12 +52,16 @@ def find_question_expanded(message, unique_users, q): def get_tagged_user(line, unique_users): """ given a chat message, return the user tagged""" tagged_user = None + for user in unique_users: + tagged_user = re.search(f"@{user}\s*", line) + if tagged_user != None: tagged_user = tagged_user.group(0).replace("@", "").strip() line = line.replace(f"@{user} ", "") break + return (tagged_user, line) def find_corresponding_question(chat, tagged_user, prev_mod_index, index, q, unique_users): @@ -73,8 +78,10 @@ def find_corresponding_question(chat, tagged_user, prev_mod_index, index, q, uni # expanding the search space ft. some additional checks for i in range(prev_mod_index[1], prev_mod_index[0], -1): message = chat[i] + if message['user'] == tagged_user: question = find_question_expanded(message, unique_users, q) + if question != None: return question @@ -112,12 +119,14 @@ def match_chat(chat): else: qna[question] = { "question": question, - "answer": answer, + "answer": f'(MODERATOR) {answer}', "time": message['time'], "user": tagged_user, - "moderator": message['user'] + "moderator": message['user'], + "moderator_response": True } - + + # updates qna search space prev_mod_index = (prev_mod_index[0], index) qna = list(qna.values()) diff --git a/server.py b/server.py index 6f7120c..e1f7545 100644 --- a/server.py +++ b/server.py @@ -51,10 +51,11 @@ def get_title(): @APP.route("/magic", methods=["get"]) def get_magic(): video_id = request.args.get("id") - try: - qna = magic(id_to_transcript(video_id), id_to_chat(video_id)) - qna.extend(match_chat(id_to_chat_split(video_id))) + transcript = id_to_transcript(video_id) + live_chat = id_to_chat_split(video_id) + qna = magic(transcript, live_chat) + qna.extend(match_chat(live_chat)) except TranscriptsDisabled: raise APITranscriptError("Transcripts have been disabled on this video.") except CouldNotRetrieveTranscript as e: