-
Notifications
You must be signed in to change notification settings - Fork 19
/
analyze_citycollege.py
38 lines (30 loc) · 1.07 KB
/
analyze_citycollege.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
#!/usr/bin/python
# -*- coding: utf-8 -*-
from BeautifulSoup import BeautifulSoup
import re
from collections import defaultdict
find_cuny_hours = re.compile(r'(\d{1,2}(:\d\d)?)\s*-\s*((\d{1,2}(:\d\d)?)\s*(([AP]M)|NOON))')
soup = BeautifulSoup(open("citycollege/registrar.html"))
starts = []
ends = []
coursetables = soup.findAll("table", {"class":"coursetable"})
for course in coursetables:
classtimes = course.findAll("td", text=re.compile(r'\d{1,2}:\d\d'))
for c in classtimes:
m = find_cuny_hours.search(c)
if m:
x = m.groups()
start = x[0]
end = x[2]
starts.append(start)
ends.append(end)
print start,end
else:
print "No match found"
print c
print "found %(count)d total class times" % { "count": len(starts) }
# dividing up the classes we've found into different timeslots to count them
timeslots = defaultdict(int)
for i in range(len(starts)):
timeslots[starts[i]+"-"+ends[i]] += 1
print(sorted(timeslots.items(), key=lambda x:x[1], reverse=True)[:10])