-
Notifications
You must be signed in to change notification settings - Fork 1
/
scraper.py
91 lines (81 loc) · 2.37 KB
/
scraper.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
from bs4 import BeautifulSoup
from urllib2 import urlopen
import time
from datetime import date
from team import Team
import re
BASE_URL="http://www.basketball-reference.com/leagues/NBA_"
def year():
today = date.today()
year = today.year
if today.month >= 9:
year += 1
return year
def base_soup():
html = urlopen(BASE_URL + str(year()) + '.html').read()
soup = BeautifulSoup(html, "lxml")
return soup
def records_soup():
html = urlopen(BASE_URL + str(year()) + '_games.html').read()
soup = BeautifulSoup(html, "lxml")
return soup
def all_wins_losses():
soup = base_soup()
team_stats = {}
for team in Team.team_names:
team_stats[team] = {}
team_stats[team]['W'] = int(soup.find(text = re.compile(team)).find_next("td").string)
team_stats[team]['L'] = int(soup.find(text = re.compile(team)).find_next("td").find_next("td").string)
team_stats[team]['%'] = float(team_stats[team]['W'])/(float(team_stats[team]['L']) + float(team_stats[team]['W']))
return team_stats
def process_calendar():
soup = records_soup()
elem_list = [elem.string for elem in soup.find("tbody").find_all_next("td")]
games = []
i = 0
while i < len(elem_list):
entry = {}
entry['Date'] = elem_list[i]
if(elem_list[i + 3]):
entry['P?'] = True
score1 = (elem_list[i + 3])
score2 = (elem_list[i + 5])
if int(score1) > int(score2):
entry['W'] = elem_list[i + 2]
entry['L'] = elem_list[i + 4]
else:
entry['L'] = elem_list[i + 2]
entry['W'] = elem_list[i + 4]
else:
entry['P?'] = False
entry['W'] = elem_list[i + 2]
entry['L'] = elem_list[i + 4]
games.append(entry)
i += 8
return games
def today():
day = date.today()
days = { 0: 'Mon',
1: 'Tue',
2: 'Wed',
3: 'Thu',
4: 'Fri',
5: 'Sat',
6: 'Sun'
}
months = { 1: 'Jan',
2: 'Feb',
3: 'Mar',
4: 'Apr',
5: 'May',
6: 'Jun',
7: 'Jul',
8: 'Aug',
9: 'Sep',
10: 'Oct',
11: 'Nov',
12: 'Dec'
}
return "%s, %s %d, %d" % (days[day.weekday()], months[day.month], day.day, day.year)
if __name__ == "__main__":
print today()