-
Notifications
You must be signed in to change notification settings - Fork 0
/
parseHTML.py
58 lines (50 loc) · 2.02 KB
/
parseHTML.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
import string, csv
continuity_html = open('continuity.cgi', 'r').read()
wins_html = open('nba_wins.html', 'r').read()
teams = ["ATL", "BOS", "CHA", "CHI", "CLE", "DAL", "DEN", "DET", "GSW", "HOU", "IND", "LAC", "LAL", "MEM", "MIA", "MIL", "MIN", "BRK", "NOP", "NYK", "OKC", "ORL", "PHI", "PHO", "POR", "SAC", "SAS", "TOR", "UTA", "WAS"]
season = []
for y in range (2016, 1950, -1):
season.append(str(y)+'-'+str(y+1)[2:])
data = {}
t = 0
s = 0
index = string.find(continuity_html, '</td>')
while index!=-1:
percentage = continuity_html[index-3] + continuity_html[index-2] + continuity_html[index-1]
if percentage[-1]=='%':
if season[s] not in data:
data[season[s]] = {}
data[season[s]][teams[t]] = [percentage]
t+=1
if t==30:
s+=1
t=0
continuity_html = continuity_html[(index+2):]
index = string.find(continuity_html, '</td>')
season_string = 'data-stat="season"'
index = string.find(wins_html, season_string)
while index!=-1:
wins_html = wins_html[(index+len(season_string)):]
d = string.find(wins_html, '-')
season = wins_html[(d-4):(d+3)]
wins_html = wins_html[(d+3):]
if season in data:
season_data = wins_html.split('\n')[0]
for c in range (0, len(season_data)):
if season_data[c].isupper():
team = season_data[c:c+3]
if team in data[season]:
wins = season_data[c+6]
if season_data[c+7].isdigit():
wins+=season_data[c+7]
data[season][team].append(wins)
index = string.find(wins_html, season_string)
for team in teams:
filename = team+'_data.csv'
with open(filename, 'wb') as csvfile:
writer = csv.writer(csvfile)
writer.writerow(["year", "team", "continuity", "wins"])
for year in data:
if year!='2016-17' and (team in data[year]) and (len(data[year][team])==2):
r = [year, team, data[year][team][0], data[year][team][1]]
writer.writerow(r)