Skip to content

Commit e6c9d4b

Browse files
committed
reformatted code
1 parent b97e0a6 commit e6c9d4b

File tree

1 file changed

+87
-74
lines changed

1 file changed

+87
-74
lines changed
+87-74
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
1-
from tkinter import Tk, Entry, Label, Button, HORIZONTAL
1+
import csv
2+
import threading
3+
import urllib.request
4+
from tkinter import HORIZONTAL, Button, Entry, Label, Tk
25
from tkinter.ttk import Progressbar
3-
from bs4 import BeautifulSoup
46

5-
import urllib.request
6-
import threading
7-
import csv
7+
from bs4 import BeautifulSoup
88

99

1010
class ScrapperLogic:
@@ -21,80 +21,81 @@ def inner_html(element):
2121

2222
@staticmethod
2323
def get_name(body):
24-
return body.find('span', {'class': 'jcn'}).a.string
24+
return body.find("span", {"class": "jcn"}).a.string
2525

2626
@staticmethod
2727
def which_digit(html):
28-
mapping_dict = {'icon-ji': 9,
29-
'icon-dc': '+',
30-
'icon-fe': '(',
31-
'icon-hg': ')',
32-
'icon-ba': '-',
33-
'icon-lk': 8,
34-
'icon-nm': 7,
35-
'icon-po': 6,
36-
'icon-rq': 5,
37-
'icon-ts': 4,
38-
'icon-vu': 3,
39-
'icon-wx': 2,
40-
'icon-yz': 1,
41-
'icon-acb': 0,
42-
}
43-
return mapping_dict.get(html, '')
28+
mapping_dict = {
29+
"icon-ji": 9,
30+
"icon-dc": "+",
31+
"icon-fe": "(",
32+
"icon-hg": ")",
33+
"icon-ba": "-",
34+
"icon-lk": 8,
35+
"icon-nm": 7,
36+
"icon-po": 6,
37+
"icon-rq": 5,
38+
"icon-ts": 4,
39+
"icon-vu": 3,
40+
"icon-wx": 2,
41+
"icon-yz": 1,
42+
"icon-acb": 0,
43+
}
44+
return mapping_dict.get(html, "")
4445

4546
def get_phone_number(self, body):
4647
i = 0
4748
phone_no = "No Number!"
4849
try:
49-
for item in body.find('p', {'class': 'contact-info'}):
50+
for item in body.find("p", {"class": "contact-info"}):
5051
i += 1
5152
if i == 2:
52-
phone_no = ''
53+
phone_no = ""
5354
try:
5455
for element in item.find_all(class_=True):
5556
classes = []
5657
classes.extend(element["class"])
5758
phone_no += str((self.which_digit(classes[1])))
58-
except:
59+
except Exception:
5960
pass
60-
except:
61+
except Exception:
6162
pass
62-
body = body['data-href']
63-
soup = BeautifulSoup(body, 'html.parser')
64-
for a in soup.find_all('a', {"id": "whatsapptriggeer"}):
63+
body = body["data-href"]
64+
soup = BeautifulSoup(body, "html.parser")
65+
for a in soup.find_all("a", {"id": "whatsapptriggeer"}):
6566
# print (a)
66-
phone_no = str(a['href'][-10:])
67+
phone_no = str(a["href"][-10:])
6768

6869
return phone_no
6970

7071
@staticmethod
7172
def get_rating(body):
7273
rating = 0.0
73-
text = body.find('span', {'class': 'star_m'})
74+
text = body.find("span", {"class": "star_m"})
7475
if text is not None:
7576
for item in text:
76-
rating += float(item['class'][0][1:]) / 10
77+
rating += float(item["class"][0][1:]) / 10
7778

7879
return rating
7980

8081
@staticmethod
8182
def get_rating_count(body):
82-
text = body.find('span', {'class': 'rt_count'}).string
83+
text = body.find("span", {"class": "rt_count"}).string
8384

8485
# Get only digits
85-
rating_count = ''.join(i for i in text if i.isdigit())
86-
return rating_count
87-
86+
rating_count = "".join(i for i in text if i.isdigit())
87+
return rating_count
88+
8889
@staticmethod
8990
def get_address(body):
90-
return body.find('span', {'class': 'mrehover'}).text.strip()
91+
return body.find("span", {"class": "mrehover"}).text.strip()
9192

9293
@staticmethod
9394
def get_location(body):
94-
text = body.find('a', {'class': 'rsmap'})
95+
text = body.find("a", {"class": "rsmap"})
9596
if not text:
9697
return
97-
text_list = text['onclick'].split(",")
98+
text_list = text["onclick"].split(",")
9899

99100
latitude = text_list[3].strip().replace("'", "")
100101
longitude = text_list[4].strip().replace("'", "")
@@ -107,44 +108,48 @@ def start_scrapping_logic(self):
107108

108109
total_url = "https://www.justdial.com/{0}/{1}".format(self.location, self.query)
109110

110-
fields = ['Name', 'Phone', 'Rating', 'Rating Count', 'Address', 'Location']
111-
out_file = open('{0}.csv'.format(self.file_name), 'w')
112-
csvwriter = csv.DictWriter(out_file, delimiter=',', fieldnames=fields)
113-
csvwriter.writerow({
114-
'Name': 'Name', #Shows the name
115-
'Phone': 'Phone',#shows the phone
116-
'Rating': 'Rating',#shows the ratings
117-
'Rating Count': 'Rating Count',#Shows the stars for ex: 4 stars
118-
'Address': 'Address',#Shows the address of the place
119-
'Location': 'Location'#shows the location
120-
})
111+
fields = ["Name", "Phone", "Rating", "Rating Count", "Address", "Location"]
112+
out_file = open("{0}.csv".format(self.file_name), "w")
113+
csvwriter = csv.DictWriter(out_file, delimiter=",", fieldnames=fields)
114+
csvwriter.writerow(
115+
{
116+
"Name": "Name", # Shows the name
117+
"Phone": "Phone", # shows the phone
118+
"Rating": "Rating", # shows the ratings
119+
"Rating Count": "Rating Count", # Shows the stars for ex: 4 stars
120+
"Address": "Address", # Shows the address of the place
121+
"Location": "Location", # shows the location
122+
}
123+
)
121124

122125
progress_value = 0
123126
while True:
124127
# Check if reached end of result
125128
if page_number > 50:
126129
progress_value = 100
127-
self.progressbar['value'] = progress_value
130+
self.progressbar["value"] = progress_value
128131
break
129132

130133
if progress_value != 0:
131134
progress_value += 1
132-
self.label_progress['text'] = "{0}{1}".format(progress_value, '%')
133-
self.progressbar['value'] = progress_value
135+
self.label_progress["text"] = "{0}{1}".format(progress_value, "%")
136+
self.progressbar["value"] = progress_value
134137

135138
url = total_url + "/page-%s" % page_number
136139
print("{0} {1}, {2}".format("Scrapping page number: ", page_number, url))
137-
req = urllib.request.Request(url, headers={'User-Agent': "Mozilla/5.0 (Windows NT 6.1; Win64; x64)"})
140+
req = urllib.request.Request(
141+
url, headers={"User-Agent": "Mozilla/5.0 (Windows NT 6.1; Win64; x64)"}
142+
)
138143
page = urllib.request.urlopen(req)
139144

140145
soup = BeautifulSoup(page.read(), "html.parser")
141-
services = soup.find_all('li', {'class': 'cntanr'})
146+
services = soup.find_all("li", {"class": "cntanr"})
142147

143148
# Iterate through the 10 results in the page
144149

145150
progress_value += 1
146-
self.label_progress['text'] = "{0}{1}".format(progress_value, '%')
147-
self.progressbar['value'] = progress_value
151+
self.label_progress["text"] = "{0}{1}".format(progress_value, "%")
152+
self.progressbar["value"] = progress_value
148153

149154
for service_html in services:
150155
try:
@@ -158,18 +163,18 @@ def start_scrapping_logic(self):
158163
address = self.get_address(service_html)
159164
location = self.get_location(service_html)
160165
if name is not None:
161-
dict_service['Name'] = name
166+
dict_service["Name"] = name
162167
if phone is not None:
163-
print('getting phone number')
164-
dict_service['Phone'] = phone
168+
print("getting phone number")
169+
dict_service["Phone"] = phone
165170
if rating is not None:
166-
dict_service['Rating'] = rating
171+
dict_service["Rating"] = rating
167172
if count is not None:
168-
dict_service['Rating Count'] = count
173+
dict_service["Rating Count"] = count
169174
if address is not None:
170-
dict_service['Address'] = address
175+
dict_service["Address"] = address
171176
if location is not None:
172-
dict_service['Address'] = location
177+
dict_service["Address"] = location
173178

174179
# Write row to CSV
175180
csvwriter.writerow(dict_service)
@@ -207,42 +212,50 @@ def start_scrapping(self):
207212
query = self.entry_query.get()
208213
location = self.entry_location.get()
209214
file_name = self.entry_file_name.get()
210-
scrapper = ScrapperLogic(query, location, file_name, self.progress, self.label_progress)
215+
scrapper = ScrapperLogic(
216+
query, location, file_name, self.progress, self.label_progress
217+
)
211218
t1 = threading.Thread(target=scrapper.start_scrapping_logic, args=[])
212219
t1.start()
213220

214221
def start(self):
215-
self.label_query = Label(self.master, text='Query')
222+
self.label_query = Label(self.master, text="Query")
216223
self.label_query.grid(row=0, column=0)
217224

218225
self.entry_query = Entry(self.master, width=23)
219226
self.entry_query.grid(row=0, column=1)
220227

221-
self.label_location = Label(self.master, text='Location')
228+
self.label_location = Label(self.master, text="Location")
222229
self.label_location.grid(row=1, column=0)
223230

224231
self.entry_location = Entry(self.master, width=23)
225232
self.entry_location.grid(row=1, column=1)
226233

227-
self.label_file_name = Label(self.master, text='File Name')
234+
self.label_file_name = Label(self.master, text="File Name")
228235
self.label_file_name.grid(row=2, column=0)
229236

230237
self.entry_file_name = Entry(self.master, width=23)
231238
self.entry_file_name.grid(row=2, column=1)
232239

233-
self.label_progress = Label(self.master, text='0%')
240+
self.label_progress = Label(self.master, text="0%")
234241
self.label_progress.grid(row=3, column=0)
235242

236-
self.button_start = Button(self.master, text="Start", command=self.start_scrapping)
243+
self.button_start = Button(
244+
self.master, text="Start", command=self.start_scrapping
245+
)
237246
self.button_start.grid(row=3, column=1)
238247

239-
self.progress = Progressbar(self.master, orient=HORIZONTAL, length=350, mode='determinate')
248+
self.progress = Progressbar(
249+
self.master, orient=HORIZONTAL, length=350, mode="determinate"
250+
)
240251
self.progress.grid(row=4, columnspan=2)
241-
#Above is the progress bar
242252

243-
if __name__ == '__main__':
253+
# Above is the progress bar
254+
255+
256+
if __name__ == "__main__":
244257
root = Tk()
245-
root.geometry('350x130+600+100')
258+
root.geometry("350x130+600+100")
246259
root.title("Just Dial Scrapper - Cool")
247260
JDScrapperGUI(root).start()
248261
root.mainloop()

0 commit comments

Comments
 (0)