-
Notifications
You must be signed in to change notification settings - Fork 690
/
Copy path35_scrape_hacktoberfest_events.py
55 lines (44 loc) · 1.56 KB
/
35_scrape_hacktoberfest_events.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
import requests
import pandas
from bs4 import BeautifulSoup
# creating a soup object with html we got from the response
url = "https://hacktoberfest.digitalocean.com/events"
response = requests.get(url)
html = response.text
soup = BeautifulSoup(html)
# creating array of datas
all_names = []
all_locations = []
all_dates = []
all_time_zones = []
all_urls = []
# iterating on all the "tr" elements.
for tr_element in soup.findAll("tr", attrs={"class": "past"}):
# for each tr element we find the proper value and add it to its proper array
name_element = tr_element.find("td", attrs={"class": "event_name"})
name = name_element.text.strip()
all_names.append(name)
location_element = tr_element.find("td", attrs={"class": "location"})
location = location_element.text.strip()
all_locations.append(location)
date_element = tr_element.find("td", attrs={"data-label": "date"})
date = date_element.text.strip()
all_dates.append(date)
time_zone_element = tr_element.find("td", attrs={"data-label": "zone"})
time_zone = time_zone_element.text.strip()
all_time_zones.append(time_zone)
url_element = tr_element.find("a", attrs={"class": "emphasis"})
url = url_element['href']
all_urls.append(url)
# setting up our Comma Seperated Values
csv_name = "events.csv"
csv_structure = {
"Name": all_names,
"Location": all_locations,
"Date": all_dates,
"Time Zone": all_time_zones,
"URL": all_urls,
}
# Creating a csv
dataFrame = pandas.DataFrame(csv_structure)
dataFrame.to_csv(csv_name, index=False, encoding='utf-8')