-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathscrapper.py
167 lines (130 loc) · 5.03 KB
/
scrapper.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
import json
import logging
import requests
from bs4 import BeautifulSoup, Comment
from typing import List, Dict
def organize_matches(matches: List[Dict]) -> Dict[str, List[Dict]]:
robot_losses = {}
losers_bracket = []
winners_bracket = []
for match in matches:
loser = match["loser"]["robot"]
robot_losses[loser] = robot_losses.get(loser, 0) + 1
is_losers = True
for match in matches:
loser = match["loser"]["robot"]
if robot_losses[loser] == 3:
is_losers = False
if is_losers:
losers_bracket.append(match)
else:
winners_bracket.append(match)
robot_losses[loser] = robot_losses.get(loser, 0) + 1
losers_bracket.reverse()
return {
"losers_bracket": losers_bracket,
"winners_bracket": winners_bracket,
}
def parse_match_details(
data_src: str, winner_name: str, event_id: str, category_id: str
) -> dict:
base_url: str = "https://events.robocore.net"
url: str = f"{base_url}/{event_id}/brackets/{category_id}"
match_url = f"{url}{data_src}"
match_response = requests.get(match_url)
match_soup = BeautifulSoup(match_response.text, "html.parser")
team_names = [
team.get_text(strip=True)
for team in match_soup.find_all("div", {"class": "team_name"})
]
team_robots = [
robot.get_text(strip=True)
for robot in match_soup.find_all("div", {"class": "team_robot"})
]
if len(team_names) < 2 or len(team_robots) < 2:
raise ValueError("Informações de equipes incompletas na partida.")
winner_index = 0 if winner_name in team_robots[0] else 1
return {
"winner": {
"team": team_names[winner_index],
"robot": team_robots[winner_index],
},
"loser": {
"team": team_names[1 - winner_index],
"robot": team_robots[1 - winner_index],
},
}
def parse_brackets(soup: BeautifulSoup, event_id: str, category_id: str) -> dict:
try:
main_table = soup.find("table", {"id": "tblBracket"})
match_links = main_table.find_all("a", {"data-src": True})
match_links.pop(0)
matches = []
for link in match_links:
if ">>" not in link.get_text(strip=True) and "<<" not in link.get_text(
strip=True
):
continue
winner_name = (
link.get_text(strip=True).replace(">>", "").replace("<<", "").strip()
)
data_src = link["data-src"]
match_data = parse_match_details(
data_src, winner_name, event_id, category_id
)
matches.append(match_data)
organized_brackets = organize_matches(matches)
return organized_brackets
except Exception as e:
logging.error(f"An error occurred while parsing brackets: {e}")
raise RuntimeError("Failed to parse brackets")
def fetch_event_data(event_id: str, category_id: str) -> BeautifulSoup:
base_url: str = "https://events.robocore.net"
url: str = f"{base_url}/{event_id}/brackets/{category_id}"
try:
response = requests.get(url)
response.raise_for_status()
if "No data found" in response.text:
logging.warning(
f"No data found for event {event_id} in category {category_id}"
)
raise FileNotFoundError(
f"No data found for event {event_id} in category {category_id}"
)
except requests.exceptions.HTTPError as e:
logging.error(
f"Failed to fetch data for event {event_id} in category {category_id}: {e}"
)
raise RuntimeError(f"Failed to fetch data: {e}")
soup = BeautifulSoup(response.text, "html.parser")
comments = soup.find_all(string=lambda text: isinstance(text, Comment))
for comment in comments:
comment.extract()
for unwanted_tag in ["style", "script"]:
for tag in soup.find_all(unwanted_tag):
tag.extract()
return soup.body
def main(event_id: str, category_id: str):
try:
soup = fetch_event_data(event_id, category_id)
brackets = parse_brackets(soup, event_id, category_id)
final = brackets["winners_bracket"].pop()
matches = brackets["winners_bracket"] + brackets["losers_bracket"]
matches.append(final)
parsed = {
"matches": [
[match["winner"]["team"], match["loser"]["team"]] for match in matches
],
"matches-robots": [
[match["winner"]["robot"], match["loser"]["robot"]] for match in matches
],
}
with open(event_id + "_" + category_id +"_matches.json", "w") as file:
json.dump(parsed, file, ensure_ascii=False, indent=4)
print("Matches saved to " + event_id + "_" + category_id +"_matches.json")
except Exception as e:
logging.error(f"An error occurred: {e}")
if __name__ == "__main__":
event_id = "rcbr-2024"
category_id = "21"
main(event_id, category_id)