From dad50f13d1d39158e620f08699fd571c0d1dd3de Mon Sep 17 00:00:00 2001 From: Oz Gitelson Date: Sat, 14 Oct 2023 17:18:08 -0400 Subject: [PATCH] create basic coursetable scraper --- backend/coursetable_scraper.py | 34 +++++++++++++++++++++ backend/display_courses.py | 56 ++++++++++++++++++++++++++++++++++ 2 files changed, 90 insertions(+) create mode 100644 backend/coursetable_scraper.py create mode 100644 backend/display_courses.py diff --git a/backend/coursetable_scraper.py b/backend/coursetable_scraper.py new file mode 100644 index 0000000..1e75895 --- /dev/null +++ b/backend/coursetable_scraper.py @@ -0,0 +1,34 @@ +import requests +import json +import datetime +import time + + +#TODO change these to env variables +cookies = { + 'session': 'enter session', + 'session.sig': 'etner session.sig', +} + +# response = requests.get('https://api.coursetable.com/api/static/catalogs/202301.json', cookies=cookies) + +course_dic = {} +for year in range(datetime.datetime.now().year-6, datetime.datetime.now().year + 6 + 1): + for season in range(1, 4): + if year not in course_dic: + course_dic[year]={} + + data_url = f'https://api.coursetable.com/api/static/catalogs/{year}0{season}.json' + response = requests.get(data_url, cookies=cookies) + + if response.status_code == 404: + print(f'unable to access {year} {season}') + continue + else: + print(f'scraping {year} {season}') + + course_dic[year][season] = json.loads(response.text) + time.sleep(1) + +with open('courses.json', 'w') as infile: + json.dump(course_dic, infile) diff --git a/backend/display_courses.py b/backend/display_courses.py new file mode 100644 index 0000000..9d1c406 --- /dev/null +++ b/backend/display_courses.py @@ -0,0 +1,56 @@ +import json +import argparse + + +with open('courses.json', 'r') as infile: + courses=json.load(infile) + new_courses={} + for year in courses: + if year not in new_courses: + new_courses[year] = {} + + for season in courses[year]: + if season not in new_courses[year]: + new_courses[year][season] = {} + + for course in courses[year][season]: + for code in course["all_course_codes"]: + dep=code.split(' ')[0] + num = str(code.split(' ')[1]) + if dep not in new_courses[year][season]: + new_courses[year][season][dep]={} + + new_courses[year][season][dep][num]=course + + courses=new_courses + +with open + +while True: + request=input('enter course:\n') + request=request.split(' ') + if len(request)==2: + year='2023' + season='3' + dep = str(request[0]).upper() + c_num = str(request[1]) + else: + year=str(request[0]) + season=str(request[1]) + dep=str(request[2]).upper() + c_num=str(request[3]) + + try: + course=courses[year][season][dep][c_num] + print(f'{course["title"]}:') + print(f'{course["description"]}\n') + print(f'rating: {course["average_rating"]}') + print(f'difficulty: {course["average_workload"]}\n') + + except: + print('invalid course') + + + + +