1
- from tkinter import Tk , Entry , Label , Button , HORIZONTAL
1
+ import csv
2
+ import threading
3
+ import urllib .request
4
+ from tkinter import HORIZONTAL , Button , Entry , Label , Tk
2
5
from tkinter .ttk import Progressbar
3
- from bs4 import BeautifulSoup
4
6
5
- import urllib .request
6
- import threading
7
- import csv
7
+ from bs4 import BeautifulSoup
8
8
9
9
10
10
class ScrapperLogic :
@@ -21,80 +21,81 @@ def inner_html(element):
21
21
22
22
@staticmethod
23
23
def get_name (body ):
24
- return body .find (' span' , {' class' : ' jcn' }).a .string
24
+ return body .find (" span" , {" class" : " jcn" }).a .string
25
25
26
26
@staticmethod
27
27
def which_digit (html ):
28
- mapping_dict = {'icon-ji' : 9 ,
29
- 'icon-dc' : '+' ,
30
- 'icon-fe' : '(' ,
31
- 'icon-hg' : ')' ,
32
- 'icon-ba' : '-' ,
33
- 'icon-lk' : 8 ,
34
- 'icon-nm' : 7 ,
35
- 'icon-po' : 6 ,
36
- 'icon-rq' : 5 ,
37
- 'icon-ts' : 4 ,
38
- 'icon-vu' : 3 ,
39
- 'icon-wx' : 2 ,
40
- 'icon-yz' : 1 ,
41
- 'icon-acb' : 0 ,
42
- }
43
- return mapping_dict .get (html , '' )
28
+ mapping_dict = {
29
+ "icon-ji" : 9 ,
30
+ "icon-dc" : "+" ,
31
+ "icon-fe" : "(" ,
32
+ "icon-hg" : ")" ,
33
+ "icon-ba" : "-" ,
34
+ "icon-lk" : 8 ,
35
+ "icon-nm" : 7 ,
36
+ "icon-po" : 6 ,
37
+ "icon-rq" : 5 ,
38
+ "icon-ts" : 4 ,
39
+ "icon-vu" : 3 ,
40
+ "icon-wx" : 2 ,
41
+ "icon-yz" : 1 ,
42
+ "icon-acb" : 0 ,
43
+ }
44
+ return mapping_dict .get (html , "" )
44
45
45
46
def get_phone_number (self , body ):
46
47
i = 0
47
48
phone_no = "No Number!"
48
49
try :
49
- for item in body .find ('p' , {' class' : ' contact-info' }):
50
+ for item in body .find ("p" , {" class" : " contact-info" }):
50
51
i += 1
51
52
if i == 2 :
52
- phone_no = ''
53
+ phone_no = ""
53
54
try :
54
55
for element in item .find_all (class_ = True ):
55
56
classes = []
56
57
classes .extend (element ["class" ])
57
58
phone_no += str ((self .which_digit (classes [1 ])))
58
- except :
59
+ except Exception :
59
60
pass
60
- except :
61
+ except Exception :
61
62
pass
62
- body = body [' data-href' ]
63
- soup = BeautifulSoup (body , ' html.parser' )
64
- for a in soup .find_all ('a' , {"id" : "whatsapptriggeer" }):
63
+ body = body [" data-href" ]
64
+ soup = BeautifulSoup (body , " html.parser" )
65
+ for a in soup .find_all ("a" , {"id" : "whatsapptriggeer" }):
65
66
# print (a)
66
- phone_no = str (a [' href' ][- 10 :])
67
+ phone_no = str (a [" href" ][- 10 :])
67
68
68
69
return phone_no
69
70
70
71
@staticmethod
71
72
def get_rating (body ):
72
73
rating = 0.0
73
- text = body .find (' span' , {' class' : ' star_m' })
74
+ text = body .find (" span" , {" class" : " star_m" })
74
75
if text is not None :
75
76
for item in text :
76
- rating += float (item [' class' ][0 ][1 :]) / 10
77
+ rating += float (item [" class" ][0 ][1 :]) / 10
77
78
78
79
return rating
79
80
80
81
@staticmethod
81
82
def get_rating_count (body ):
82
- text = body .find (' span' , {' class' : ' rt_count' }).string
83
+ text = body .find (" span" , {" class" : " rt_count" }).string
83
84
84
85
# Get only digits
85
- rating_count = '' .join (i for i in text if i .isdigit ())
86
- return rating_count
87
-
86
+ rating_count = "" .join (i for i in text if i .isdigit ())
87
+ return rating_count
88
+
88
89
@staticmethod
89
90
def get_address (body ):
90
- return body .find (' span' , {' class' : ' mrehover' }).text .strip ()
91
+ return body .find (" span" , {" class" : " mrehover" }).text .strip ()
91
92
92
93
@staticmethod
93
94
def get_location (body ):
94
- text = body .find ('a' , {' class' : ' rsmap' })
95
+ text = body .find ("a" , {" class" : " rsmap" })
95
96
if not text :
96
97
return
97
- text_list = text [' onclick' ].split ("," )
98
+ text_list = text [" onclick" ].split ("," )
98
99
99
100
latitude = text_list [3 ].strip ().replace ("'" , "" )
100
101
longitude = text_list [4 ].strip ().replace ("'" , "" )
@@ -107,44 +108,48 @@ def start_scrapping_logic(self):
107
108
108
109
total_url = "https://www.justdial.com/{0}/{1}" .format (self .location , self .query )
109
110
110
- fields = ['Name' , 'Phone' , 'Rating' , 'Rating Count' , 'Address' , 'Location' ]
111
- out_file = open ('{0}.csv' .format (self .file_name ), 'w' )
112
- csvwriter = csv .DictWriter (out_file , delimiter = ',' , fieldnames = fields )
113
- csvwriter .writerow ({
114
- 'Name' : 'Name' , #Shows the name
115
- 'Phone' : 'Phone' ,#shows the phone
116
- 'Rating' : 'Rating' ,#shows the ratings
117
- 'Rating Count' : 'Rating Count' ,#Shows the stars for ex: 4 stars
118
- 'Address' : 'Address' ,#Shows the address of the place
119
- 'Location' : 'Location' #shows the location
120
- })
111
+ fields = ["Name" , "Phone" , "Rating" , "Rating Count" , "Address" , "Location" ]
112
+ out_file = open ("{0}.csv" .format (self .file_name ), "w" )
113
+ csvwriter = csv .DictWriter (out_file , delimiter = "," , fieldnames = fields )
114
+ csvwriter .writerow (
115
+ {
116
+ "Name" : "Name" , # Shows the name
117
+ "Phone" : "Phone" , # shows the phone
118
+ "Rating" : "Rating" , # shows the ratings
119
+ "Rating Count" : "Rating Count" , # Shows the stars for ex: 4 stars
120
+ "Address" : "Address" , # Shows the address of the place
121
+ "Location" : "Location" , # shows the location
122
+ }
123
+ )
121
124
122
125
progress_value = 0
123
126
while True :
124
127
# Check if reached end of result
125
128
if page_number > 50 :
126
129
progress_value = 100
127
- self .progressbar [' value' ] = progress_value
130
+ self .progressbar [" value" ] = progress_value
128
131
break
129
132
130
133
if progress_value != 0 :
131
134
progress_value += 1
132
- self .label_progress [' text' ] = "{0}{1}" .format (progress_value , '%' )
133
- self .progressbar [' value' ] = progress_value
135
+ self .label_progress [" text" ] = "{0}{1}" .format (progress_value , "%" )
136
+ self .progressbar [" value" ] = progress_value
134
137
135
138
url = total_url + "/page-%s" % page_number
136
139
print ("{0} {1}, {2}" .format ("Scrapping page number: " , page_number , url ))
137
- req = urllib .request .Request (url , headers = {'User-Agent' : "Mozilla/5.0 (Windows NT 6.1; Win64; x64)" })
140
+ req = urllib .request .Request (
141
+ url , headers = {"User-Agent" : "Mozilla/5.0 (Windows NT 6.1; Win64; x64)" }
142
+ )
138
143
page = urllib .request .urlopen (req )
139
144
140
145
soup = BeautifulSoup (page .read (), "html.parser" )
141
- services = soup .find_all ('li' , {' class' : ' cntanr' })
146
+ services = soup .find_all ("li" , {" class" : " cntanr" })
142
147
143
148
# Iterate through the 10 results in the page
144
149
145
150
progress_value += 1
146
- self .label_progress [' text' ] = "{0}{1}" .format (progress_value , '%' )
147
- self .progressbar [' value' ] = progress_value
151
+ self .label_progress [" text" ] = "{0}{1}" .format (progress_value , "%" )
152
+ self .progressbar [" value" ] = progress_value
148
153
149
154
for service_html in services :
150
155
try :
@@ -158,18 +163,18 @@ def start_scrapping_logic(self):
158
163
address = self .get_address (service_html )
159
164
location = self .get_location (service_html )
160
165
if name is not None :
161
- dict_service [' Name' ] = name
166
+ dict_service [" Name" ] = name
162
167
if phone is not None :
163
- print (' getting phone number' )
164
- dict_service [' Phone' ] = phone
168
+ print (" getting phone number" )
169
+ dict_service [" Phone" ] = phone
165
170
if rating is not None :
166
- dict_service [' Rating' ] = rating
171
+ dict_service [" Rating" ] = rating
167
172
if count is not None :
168
- dict_service [' Rating Count' ] = count
173
+ dict_service [" Rating Count" ] = count
169
174
if address is not None :
170
- dict_service [' Address' ] = address
175
+ dict_service [" Address" ] = address
171
176
if location is not None :
172
- dict_service [' Address' ] = location
177
+ dict_service [" Address" ] = location
173
178
174
179
# Write row to CSV
175
180
csvwriter .writerow (dict_service )
@@ -207,42 +212,50 @@ def start_scrapping(self):
207
212
query = self .entry_query .get ()
208
213
location = self .entry_location .get ()
209
214
file_name = self .entry_file_name .get ()
210
- scrapper = ScrapperLogic (query , location , file_name , self .progress , self .label_progress )
215
+ scrapper = ScrapperLogic (
216
+ query , location , file_name , self .progress , self .label_progress
217
+ )
211
218
t1 = threading .Thread (target = scrapper .start_scrapping_logic , args = [])
212
219
t1 .start ()
213
220
214
221
def start (self ):
215
- self .label_query = Label (self .master , text = ' Query' )
222
+ self .label_query = Label (self .master , text = " Query" )
216
223
self .label_query .grid (row = 0 , column = 0 )
217
224
218
225
self .entry_query = Entry (self .master , width = 23 )
219
226
self .entry_query .grid (row = 0 , column = 1 )
220
227
221
- self .label_location = Label (self .master , text = ' Location' )
228
+ self .label_location = Label (self .master , text = " Location" )
222
229
self .label_location .grid (row = 1 , column = 0 )
223
230
224
231
self .entry_location = Entry (self .master , width = 23 )
225
232
self .entry_location .grid (row = 1 , column = 1 )
226
233
227
- self .label_file_name = Label (self .master , text = ' File Name' )
234
+ self .label_file_name = Label (self .master , text = " File Name" )
228
235
self .label_file_name .grid (row = 2 , column = 0 )
229
236
230
237
self .entry_file_name = Entry (self .master , width = 23 )
231
238
self .entry_file_name .grid (row = 2 , column = 1 )
232
239
233
- self .label_progress = Label (self .master , text = '0%' )
240
+ self .label_progress = Label (self .master , text = "0%" )
234
241
self .label_progress .grid (row = 3 , column = 0 )
235
242
236
- self .button_start = Button (self .master , text = "Start" , command = self .start_scrapping )
243
+ self .button_start = Button (
244
+ self .master , text = "Start" , command = self .start_scrapping
245
+ )
237
246
self .button_start .grid (row = 3 , column = 1 )
238
247
239
- self .progress = Progressbar (self .master , orient = HORIZONTAL , length = 350 , mode = 'determinate' )
248
+ self .progress = Progressbar (
249
+ self .master , orient = HORIZONTAL , length = 350 , mode = "determinate"
250
+ )
240
251
self .progress .grid (row = 4 , columnspan = 2 )
241
- #Above is the progress bar
242
252
243
- if __name__ == '__main__' :
253
+ # Above is the progress bar
254
+
255
+
256
+ if __name__ == "__main__" :
244
257
root = Tk ()
245
- root .geometry (' 350x130+600+100' )
258
+ root .geometry (" 350x130+600+100" )
246
259
root .title ("Just Dial Scrapper - Cool" )
247
260
JDScrapperGUI (root ).start ()
248
261
root .mainloop ()
0 commit comments