-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathhtml_soup.py
executable file
·57 lines (45 loc) · 1.81 KB
/
html_soup.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
#!/home/sgrosu/anaconda2/bin/python
# -*- coding: utf-8 -*-
# Please note that the function 'make_request' is provided for your reference only.
# You will not be able to to actually use it from within the Udacity web UI.
# Your task is to process the HTML using BeautifulSoup, extract the hidden
# form field values for "__EVENTVALIDATION" and "__VIEWSTATE" and set the appropriate
# values in the data dictionary.
# All your changes should be in the 'extract_data' function
from bs4 import BeautifulSoup
import requests
import json
html_page = "page_source.html"
def extract_data(page):
data = {"eventvalidation": "",
"viewstate": ""}
with open(page, "r") as html:
soup = BeautifulSoup(html,'html.parser')
# do something here to find the necessary values
data["viewstate"] = soup.find(id="__VIEWSTATE").get('value')
data["eventvalidation"] = soup.find(id="__EVENTVALIDATION").get('value')
return data
def make_request(data):
eventvalidation = data["eventvalidation"]
viewstate = data["viewstate"]
r = requests.post("http://www.transtats.bts.gov/Data_Elements.aspx?Data=2",
data={'AirportList': "BOS",
'CarrierList': "VX",
'Submit': 'Submit',
"__EVENTTARGET": "",
"__EVENTARGUMENT": "",
"__EVENTVALIDATION": eventvalidation,
"__VIEWSTATE": viewstate
})
return r.text
'''
def test():
data = extract_data(html_page)
assert data["eventvalidation"] != ""
assert data["eventvalidation"].startswith("/wEWjAkCoIj1ng0")
assert data["viewstate"].startswith("/wEPDwUKLTI")
test()
'''
#data = extract_data(html_page)
#make_request(data)
extract_data(html_page)