-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathWebScrapper.py
97 lines (68 loc) · 2.41 KB
/
WebScrapper.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
from bs4 import BeautifulSoup
import requests
import re
from config import username, password, login_url, cadeiras_ref
session = requests.Session()
def extract_hidden_lt(text):
pattern = r'<input type="hidden" name="lt" value="(.*?)" />'
match = re.search(pattern, text)
if match:
return match.group(1)
else:
return None
def extract_hidden_execution(text):
pattern = r'<input type="hidden" name="execution" value="(.*?)" />'
match = re.search(pattern, text)
if match:
return match.group(1)
else:
return None
def encontrar_estudante(link, numero_estudante):
s = requests.Session()
request = s.get(login_url)
lt = extract_hidden_lt(request.text)
execution = extract_hidden_execution(request.text)
print(lt)
print(execution)
payload = {
'username': username,
'password': password,
'lt': lt,
'execution': execution,
'_eventId': 'submit',
'submit': 'LOGIN'
}
r = s.post(login_url, data=payload)
response = ''
if r.status_code == 200:
descricao = []
estudante = []
total = []
response = s.get(link).text
print(link)
soup = BeautifulSoup(response, 'html.parser')
tables = soup.find_all('table', class_='tab_complex')
for table in tables:
td = table.find('td', text=lambda text: text and text.strip() == numero_estudante)
if td:
tr = td.find_parent('tr')
tds = tr.find_all('td')
for td in tds:
estudante.append(td.text.strip())
table = tr.find_parent('table')
ths = table.find_all('th')
for th in ths:
descricao.append(th.text.strip())
trs = table.find_all('tr')
last_tr = trs[-1]
tds = last_tr.find_all('td')
for td in tds:
total.append(td.text.strip())
result = ''
for i in range(0, len(estudante)):
if i<2:
result = result + descricao[i] + ': ' + estudante[i] + '\n'
elif i>=2:
result = result + descricao[i] + ': ' + estudante[i] +'/'+ total[i-1]+'\n'
return result
return 'Estudante não encontrado'