-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtest1.py
72 lines (64 loc) · 2.31 KB
/
test1.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
import requests
from bs4 import BeautifulSoup
import pandas as pd
# URL of the website to scrape
base_url = "https://www.dubizzle.com.eg"
path = "/properties/apartments-duplex-for-sale/?filter=down_payment_between_0_to_1"
# Number of pages to scrape
num_pages = 3
# Lists to store the extracted data
prices = []
locations = []
areas = []
times = []
subtitles = []
names = []
bedrooms = []
bathrooms = []
# Scrape the website
for page in range(1, num_pages + 1):
url = base_url + path + "&page=" + str(page)
response = requests.get(url)
soup = BeautifulSoup(response.text, "html.parser")
# Extract the data
price_tags = soup.find_all("div", class_="_1075545d _52497c97 _96d4439a")
location_tags = soup.find_all("div", class_="e48cb10f undefined")
area_tags = soup.find_all("span", class_="e021be12 _550213c9")
time_tags = soup.find_all("span", class_="c4ad15ab")
subtitle_tags = soup.find_all("div", class_="_1075545d a8f6df88")
name_tags = soup.find_all("div", class_="a5112ca8 _5fdf4379")
bedbed_tags = soup.find_all("span", class_="e021be12")
bathbath_tags = soup.find_all("span", class_="e021be12")
# Append the extracted data to the respective lists
for tag in price_tags:
prices.append(tag.get_text().strip())
for tag in location_tags:
locations.append(tag.get_text().strip())
for tag in area_tags:
areas.append(tag.get_text().strip())
for tag in time_tags:
times.append(tag.get_text().strip())
for tag in subtitle_tags:
subtitles.append(tag.get_text().strip())
for tag in name_tags:
names.append(tag.get_text().strip())
for tag in bedbed_tags:
bedrooms.append(tag.get_text().strip())
for tag in bathbath_tags:
bathrooms.append(tag.get_text().strip())
# Create a DataFrame from the extracted data
data = {
'Price': prices,
'Location': locations,
'Area': areas,
'Time': times,
'Subtitle': subtitles,
'Name': names,
'Bedrooms': bedrooms,
'Bathrooms': bathrooms,
}
df = pd.DataFrame.from_dict(data, orient='index')
df = df.transpose()
# Save the DataFrame to a CSV file
df.to_csv('scraped_data0.csv', index=False, encoding='utf-8-sig')
print("Data exported to scraped_data.csv file.")