-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathscrape_google.py
45 lines (31 loc) · 1.07 KB
/
scrape_google.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
import csv
import os.path
import requests
from bs4 import BeautifulSoup
FIELD_TITLE = "Title"
FIELD_LINKS = "Links"
def fetch_html(url: str, payload: dict):
"""Fetch url and return the HTML content"""
r = requests.get(url, params=payload)
print("You're on", r.url)
return r.content
def parse_and_scrape_data(raw_data):
soup = BeautifulSoup(raw_data, "html.parser")
for item in soup.find_all("div", {"class": "g"}):
title = item.a.text
links = item.cite.text
product_details = {
FIELD_TITLE: title,
FIELD_LINKS: links,
}
write_to_csv(product_details)
def write_to_csv(product_details: dict, filename: str = "googleresults.csv"):
file_exists = os.path.isfile(filename)
with open(filename, "a", newline="") as csvfile:
writer = csv.DictWriter(csvfile, fieldnames=(FIELD_TITLE, FIELD_LINKS))
if not file_exists:
writer.writeheader()
writer.writerow(product_details)
if __name__ == "__main__":
print("Running as a progam")
# TODO: Do something useful