Skip to content

Commit ea74d9f

Browse files
committed
add optimization, reduce overhead, and add filtering false-positives arg
1 parent c462676 commit ea74d9f

File tree

1 file changed

+81
-10
lines changed

1 file changed

+81
-10
lines changed

js-parse.py

Lines changed: 81 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
from tqdm import tqdm
55
import jsbeautifier
66
import argparse
7+
import httpx
78

89
pretty_files = []
910
get_py_filename = os.path.basename(__file__)
@@ -38,14 +39,20 @@ class NewlineFormatter(argparse.RawDescriptionHelpFormatter, argparse.ArgumentDe
3839
parser.add_argument("-s", "--save", help="save prettified js files", action="store_true")
3940
parser.add_argument("-b", "--blacklist", help="blacklist subdomains/domains", nargs="+", default="")
4041
parser.add_argument("-S", "--stdout", help="stdout friendly, displays urls only in stdout", action="store_true")
42+
parser.add_argument("-f", "--filter", help="removes false positives with httpx/requests (use at your own risk)", action="store_true")
4143

4244
group = parser.add_mutually_exclusive_group()
4345
group.add_argument("-m", "--merge", help="create file and merge all urls into it", action="store_true")
4446
group.add_argument("-i", "--isolate", help="create multiple files and store urls where they were parsed from", action="store_true")
4547
args = parser.parse_args()
4648
target_url = args.url
49+
50+
if (target_url[len(target_url) - 1] == '/'):
51+
target_url = args.url[:len(target_url)-1]
52+
4753
intro_logo = f"""\u001b[31m
4854
55+
4956
░░░░░██╗░██████╗░░░░░░██████╗░░█████╗░██████╗░░██████╗███████╗
5057
░░░░░██║██╔════╝░░░░░░██╔══██╗██╔══██╗██╔══██╗██╔════╝██╔════╝
5158
░░░░░██║╚█████╗░█████╗██████╔╝███████║██████╔╝╚█████╗░█████╗░░
@@ -147,9 +154,7 @@ def extract_urls(url):
147154
absolute_urls = re.findall(absolute_pattern, req)
148155
absolute_urls = [url[1] for url in absolute_urls]
149156
all_dirs = relative_dirs + absolute_urls
150-
unique_dirs = list(dict.fromkeys(all_dirs))
151-
unique_dirs.sort()
152-
return unique_dirs
157+
return all_dirs
153158

154159
def fetch_js(url):
155160
headers = {'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/42.0.2311.135 Safari/537.36 Edge/12.246'}
@@ -173,22 +178,26 @@ def move_stored_files():
173178

174179
def write_files():
175180
remove_dupes()
181+
if (args.filter):
182+
filter_urls_with_tqdm()
176183
with open(f"{target}/parsed-urls/all_urls.txt", "w", encoding="utf-8") as directories:
177-
for unique_dir in all_dirs:
178-
directories.write('')
184+
directories.write('')
179185
with open(f"{target}/parsed-urls/all_urls.txt", "a", encoding="utf-8") as directories:
180186
for unique_dir in all_dirs:
181-
directories.write(unique_dir + '\n')
187+
directories.write(clean_urls(unique_dir) + '\n')
182188

183189
def stdout_dirs():
184190
remove_dupes()
191+
if (args.filter and args.stdout):
192+
filter_urls_without_tqdm()
193+
else:
194+
filter_urls_with_tqdm()
185195
for dir in all_dirs:
186-
print(dir)
196+
print(clean_urls(dir))
197+
187198

188199
def remove_dupes():
189-
global all_dirs
190-
all_dirs = list(dict.fromkeys(all_dirs))
191-
return all_dirs
200+
all_dirs[:] = list(dict.fromkeys(all_dirs))
192201

193202
def process_files_with_tqdm():
194203
blacklist = args.blacklist
@@ -218,6 +227,68 @@ def process_files_without_tqdm():
218227
else:
219228
store_urls(target_url + js_file)
220229

230+
def filter_urls_without_tqdm():
231+
for dir in all_dirs[:]:
232+
try:
233+
if (dir[:4] == "http"):
234+
get_status, post_status = httpx.get(dir, follow_redirects=True).status_code, httpx.post(dir, follow_redirects=True).status_code
235+
236+
elif (dir[0] != "/"):
237+
get_status, post_status = httpx.get(args.url + f'/{dir}', follow_redirects=True).status_code, httpx.post(args.url + f'/{dir}', follow_redirects=True).status_code
238+
239+
else:
240+
get_status, post_status = httpx.get(args.url + dir, follow_redirects=True).status_code, httpx.post(args.url + dir, follow_redirects=True).status_code
241+
242+
if (get_status == 404 and post_status == 404):
243+
all_dirs.remove(dir)
244+
elif (post_status != 405 and post_status != 404):
245+
pass
246+
elif (get_status != 404):
247+
pass
248+
else:
249+
all_dirs.remove(dir)
250+
251+
except:
252+
all_dirs.remove(dir)
253+
254+
def filter_urls_with_tqdm():
255+
print('\nVerifying URLS, please wait')
256+
custom_bar_format = "\033[32m{desc}\033[0m: [{n}/{total} {percentage:.0f}%] \033[31mTime-Taking:\033[0m [{elapsed}] \033[31mTime-Remaining:\033[0m [{remaining}] "
257+
total_items = len(all_dirs)
258+
for dir in tqdm(all_dirs[:], desc="Verifying", unit='URL', total=total_items, bar_format=custom_bar_format, position=0, dynamic_ncols=True, leave=True):
259+
try:
260+
if (dir[:4] == "http"):
261+
get_status, post_status = httpx.get(dir, follow_redirects=True).status_code, httpx.post(dir, follow_redirects=True).status_code
262+
263+
elif (dir[0] != "/"):
264+
get_status, post_status = httpx.get(args.url + f'/{dir}', follow_redirects=True).status_code, httpx.post(args.url + f'/{dir}', follow_redirects=True).status_code
265+
266+
else:
267+
get_status, post_status = httpx.get(args.url + dir, follow_redirects=True).status_code, httpx.post(args.url + dir, follow_redirects=True).status_code
268+
269+
if (get_status == 404 and post_status == 404):
270+
all_dirs.remove(dir)
271+
print(dir + " " * 2 + f"{ [get_status]} [GET]", flush=True)
272+
elif (post_status != 405 and post_status != 404):
273+
print(dir + " " * 2 + f"{ [post_status]} [POST]", flush=True)
274+
elif (get_status != 404):
275+
print(dir + " " * 2 + f"{ [get_status]} [GET]", flush=True)
276+
else:
277+
print(dir + " " * 2 + f"{ [get_status]} [GET]", flush=True)
278+
all_dirs.remove(dir)
279+
280+
except:
281+
all_dirs.remove(dir)
282+
283+
def clean_urls(url):
284+
if(url[:4] == "http"):
285+
return url
286+
elif (url[0] != "/"):
287+
url = "/" + url
288+
return url
289+
else:
290+
return url
291+
221292
if __name__ == "__main__":
222293
if (args.stdout):
223294
pass

0 commit comments

Comments
 (0)