4
4
from tqdm import tqdm
5
5
import jsbeautifier
6
6
import argparse
7
+ import httpx
7
8
8
9
pretty_files = []
9
10
get_py_filename = os .path .basename (__file__ )
@@ -38,14 +39,20 @@ class NewlineFormatter(argparse.RawDescriptionHelpFormatter, argparse.ArgumentDe
38
39
parser .add_argument ("-s" , "--save" , help = "save prettified js files" , action = "store_true" )
39
40
parser .add_argument ("-b" , "--blacklist" , help = "blacklist subdomains/domains" , nargs = "+" , default = "" )
40
41
parser .add_argument ("-S" , "--stdout" , help = "stdout friendly, displays urls only in stdout" , action = "store_true" )
42
+ parser .add_argument ("-f" , "--filter" , help = "removes false positives with httpx/requests (use at your own risk)" , action = "store_true" )
41
43
42
44
group = parser .add_mutually_exclusive_group ()
43
45
group .add_argument ("-m" , "--merge" , help = "create file and merge all urls into it" , action = "store_true" )
44
46
group .add_argument ("-i" , "--isolate" , help = "create multiple files and store urls where they were parsed from" , action = "store_true" )
45
47
args = parser .parse_args ()
46
48
target_url = args .url
49
+
50
+ if (target_url [len (target_url ) - 1 ] == '/' ):
51
+ target_url = args .url [:len (target_url )- 1 ]
52
+
47
53
intro_logo = f"""\u001b [31m
48
54
55
+
49
56
░░░░░██╗░██████╗░░░░░░██████╗░░█████╗░██████╗░░██████╗███████╗
50
57
░░░░░██║██╔════╝░░░░░░██╔══██╗██╔══██╗██╔══██╗██╔════╝██╔════╝
51
58
░░░░░██║╚█████╗░█████╗██████╔╝███████║██████╔╝╚█████╗░█████╗░░
@@ -147,9 +154,7 @@ def extract_urls(url):
147
154
absolute_urls = re .findall (absolute_pattern , req )
148
155
absolute_urls = [url [1 ] for url in absolute_urls ]
149
156
all_dirs = relative_dirs + absolute_urls
150
- unique_dirs = list (dict .fromkeys (all_dirs ))
151
- unique_dirs .sort ()
152
- return unique_dirs
157
+ return all_dirs
153
158
154
159
def fetch_js (url ):
155
160
headers = {'user-agent' : 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/42.0.2311.135 Safari/537.36 Edge/12.246' }
@@ -173,22 +178,26 @@ def move_stored_files():
173
178
174
179
def write_files ():
175
180
remove_dupes ()
181
+ if (args .filter ):
182
+ filter_urls_with_tqdm ()
176
183
with open (f"{ target } /parsed-urls/all_urls.txt" , "w" , encoding = "utf-8" ) as directories :
177
- for unique_dir in all_dirs :
178
- directories .write ('' )
184
+ directories .write ('' )
179
185
with open (f"{ target } /parsed-urls/all_urls.txt" , "a" , encoding = "utf-8" ) as directories :
180
186
for unique_dir in all_dirs :
181
- directories .write (unique_dir + '\n ' )
187
+ directories .write (clean_urls ( unique_dir ) + '\n ' )
182
188
183
189
def stdout_dirs ():
184
190
remove_dupes ()
191
+ if (args .filter and args .stdout ):
192
+ filter_urls_without_tqdm ()
193
+ else :
194
+ filter_urls_with_tqdm ()
185
195
for dir in all_dirs :
186
- print (dir )
196
+ print (clean_urls (dir ))
197
+
187
198
188
199
def remove_dupes ():
189
- global all_dirs
190
- all_dirs = list (dict .fromkeys (all_dirs ))
191
- return all_dirs
200
+ all_dirs [:] = list (dict .fromkeys (all_dirs ))
192
201
193
202
def process_files_with_tqdm ():
194
203
blacklist = args .blacklist
@@ -218,6 +227,68 @@ def process_files_without_tqdm():
218
227
else :
219
228
store_urls (target_url + js_file )
220
229
230
+ def filter_urls_without_tqdm ():
231
+ for dir in all_dirs [:]:
232
+ try :
233
+ if (dir [:4 ] == "http" ):
234
+ get_status , post_status = httpx .get (dir , follow_redirects = True ).status_code , httpx .post (dir , follow_redirects = True ).status_code
235
+
236
+ elif (dir [0 ] != "/" ):
237
+ get_status , post_status = httpx .get (args .url + f'/{ dir } ' , follow_redirects = True ).status_code , httpx .post (args .url + f'/{ dir } ' , follow_redirects = True ).status_code
238
+
239
+ else :
240
+ get_status , post_status = httpx .get (args .url + dir , follow_redirects = True ).status_code , httpx .post (args .url + dir , follow_redirects = True ).status_code
241
+
242
+ if (get_status == 404 and post_status == 404 ):
243
+ all_dirs .remove (dir )
244
+ elif (post_status != 405 and post_status != 404 ):
245
+ pass
246
+ elif (get_status != 404 ):
247
+ pass
248
+ else :
249
+ all_dirs .remove (dir )
250
+
251
+ except :
252
+ all_dirs .remove (dir )
253
+
254
+ def filter_urls_with_tqdm ():
255
+ print ('\n Verifying URLS, please wait' )
256
+ custom_bar_format = "\033 [32m{desc}\033 [0m: [{n}/{total} {percentage:.0f}%] \033 [31mTime-Taking:\033 [0m [{elapsed}] \033 [31mTime-Remaining:\033 [0m [{remaining}] "
257
+ total_items = len (all_dirs )
258
+ for dir in tqdm (all_dirs [:], desc = "Verifying" , unit = 'URL' , total = total_items , bar_format = custom_bar_format , position = 0 , dynamic_ncols = True , leave = True ):
259
+ try :
260
+ if (dir [:4 ] == "http" ):
261
+ get_status , post_status = httpx .get (dir , follow_redirects = True ).status_code , httpx .post (dir , follow_redirects = True ).status_code
262
+
263
+ elif (dir [0 ] != "/" ):
264
+ get_status , post_status = httpx .get (args .url + f'/{ dir } ' , follow_redirects = True ).status_code , httpx .post (args .url + f'/{ dir } ' , follow_redirects = True ).status_code
265
+
266
+ else :
267
+ get_status , post_status = httpx .get (args .url + dir , follow_redirects = True ).status_code , httpx .post (args .url + dir , follow_redirects = True ).status_code
268
+
269
+ if (get_status == 404 and post_status == 404 ):
270
+ all_dirs .remove (dir )
271
+ print (dir + " " * 2 + f"{ [get_status ]} [GET]" , flush = True )
272
+ elif (post_status != 405 and post_status != 404 ):
273
+ print (dir + " " * 2 + f"{ [post_status ]} [POST]" , flush = True )
274
+ elif (get_status != 404 ):
275
+ print (dir + " " * 2 + f"{ [get_status ]} [GET]" , flush = True )
276
+ else :
277
+ print (dir + " " * 2 + f"{ [get_status ]} [GET]" , flush = True )
278
+ all_dirs .remove (dir )
279
+
280
+ except :
281
+ all_dirs .remove (dir )
282
+
283
+ def clean_urls (url ):
284
+ if (url [:4 ] == "http" ):
285
+ return url
286
+ elif (url [0 ] != "/" ):
287
+ url = "/" + url
288
+ return url
289
+ else :
290
+ return url
291
+
221
292
if __name__ == "__main__" :
222
293
if (args .stdout ):
223
294
pass
0 commit comments