-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathrmp.py
167 lines (130 loc) · 5.75 KB
/
rmp.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
"""
Copyright (c) 2016 Stepan Fedorko-Bartos, Ceegan Hale
Under MIT License - https://github.com/Step7750/ScheduleStorm/blob/master/LICENSE.md
This file is a resource for Schedule Storm - https://github.com/Step7750/ScheduleStorm
"""
import threading
import requests
import pymongo
import time
import logging
log = logging.getLogger("RMP")
class RateMyProfessors(threading.Thread):
def __init__(self, rmpids, interval):
"""
Constructor for RateMyProfessors to set the RMP schools to request and the interval
:param rmpids: **list** List of rmp ids to scrape for
:param interval: **int** Seconds to wait in between scraping
:return:
"""
threading.Thread.__init__(self)
# Pass in a list that contains the the ids to fetch
self.ids = rmpids
# The amount of seconds to wait before scraping RMP again
self.interval = interval
# Establish db connection
self.db = pymongo.MongoClient().ScheduleStorm
log.info("Ensuring MongoDB indexes exist")
self.db.RateMyProfessors.create_index(
[("school", pymongo.ASCENDING)]
)
self.db.RateMyProfessors.create_index(
[("id", pymongo.ASCENDING),
("school", pymongo.ASCENDING)],
unique=True
)
def getRatingsForSchool(self, schoolid):
"""
Returns the JSON for teacher ratings for the specified school id
:param schoolid: **int** RMP ID that defines this school (should be in it's settings)
:return: **dict** Teacher ratings for schoolid
"""
log.info("Obtaining RMP data for " + str(schoolid))
apiurl = "http://search.mtvnservices.com/typeahead/suggest/" \
"?q=schoolid_s%3A" + str(schoolid) + \
"&defType=edismax" \
"&qf=teacherfullname_t%5E1000+autosuggest" \
"&sort=total_number_of_ratings_i+desc" \
"&siteName=rmp" \
"&rows=999999" \
"&start=0" \
"&fl=pk_id+teacherfirstname_t+teacherlastname_t+total_number_of_ratings_i+averageratingscore_rf+" \
"teachermiddlename_t+averageeasyscore_rf"
obtained = False
while not obtained:
# Get the data
r = False
try:
r = requests.get(apiurl)
except Exception as e:
log.critical("There was an exception while retrieving RMP data for " + str(schoolid) + " | " + str(e))
if r and r.status_code == requests.codes.ok:
# We got the data we wanted
obtained = True
# Parse it
jsonval = r.json()
# Make sure it has the properties we want
if "response" in jsonval and "docs" in jsonval["response"]:
return jsonval["response"]["docs"]
else:
return False
else:
# We didn't get a successful response
return False
def upsertTeachers(self, teachers, schoolid):
"""
Upserts the teachers from schoolid into the db
:param teachers: **dict** RMP teacher response dict (response from getRatingsForSchool)
:param schoolid: **int** RMP ID that defines this school (should be in it's settings)
:return:
"""
log.info("Upserting teachers for " + str(schoolid) + " into RMP db")
for teacher in teachers:
if "averageratingscore_rf" in teacher:
# We only want to insert them if they actually have a rating
# We want to remap the dict keys for the DB
mapkeys = {
"pk_id": "id",
"averageratingscore_rf": "rating",
"total_number_of_ratings_i": "numratings",
"teacherfirstname_t": "firstname",
"teachermiddlename_t": "middlename",
"teacherlastname_t": "lastname",
"teacherdepartment_s": "department",
"averageeasyscore_rf": "easyrating"
}
# Object to upsert
upsertobj = {}
# Iterate through the teacher and process the keys
for key in teacher:
if key in mapkeys:
# Convert the appropriate keys
upsertobj[mapkeys[key]] = teacher[key]
if "name" in key:
upsertobj[mapkeys[key]] = upsertobj[mapkeys[key]].strip()
upsertobj["school"] = schoolid
self.db.RateMyProfessors.update\
(
{"id": upsertobj["id"]},
{
"$set": upsertobj,
"$currentDate": {"lastModified": True}
},
upsert=True
)
log.info("Finished adding data to DB for " + str(schoolid))
def run(self):
if self.interval and self.interval > 0:
while True:
# Iterate through the ids and update the RMP ratings
for id in self.ids:
try:
rmpdata = self.getRatingsForSchool(id)
if rmpdata:
self.upsertTeachers(rmpdata, id)
else:
log.error("Failed to obtain RMP data for " + str(id))
except Exception as e:
log.critical("There was an error while obtaining and parsing RMP data for"
+ str(id) + " | " + str(e))
time.sleep(self.interval)