2
2
from datetime import datetime
3
3
import requests
4
4
from os import path , mkdir
5
- from progress .bar import Bar
5
+ import argparse
6
+ from sys import modules
7
+
8
+ try :
9
+ from progress .bar import Bar
10
+
11
+ except ModuleNotFoundError :
12
+ print ("Make sure the progress module is installed." )
13
+ exit (0 )
6
14
7
15
8
16
def status (message ):
@@ -12,6 +20,7 @@ def status(message):
12
20
def main ():
13
21
status ("Fetching latest pastes..." )
14
22
23
+ # fetch latest 100 pastes
15
24
current_request = requests .get ("https://scrape.pastebin.com/api_scraping.php?limit=100" )
16
25
current_json = current_request .json ()
17
26
@@ -20,36 +29,69 @@ def main():
20
29
21
30
for entry in current_json :
22
31
path_t = path .join ("files" , "{0}.txt" .format (entry ["key" ]))
32
+
23
33
if path .isfile (path_t ):
24
34
skipped_pastes += 1
25
35
26
36
with Bar ("Processing" , max = len (current_json ) - skipped_pastes , fill = ">" ) as bar :
27
37
for entry in current_json :
28
38
path_t = path .join ("files" , "{0}.txt" .format (entry ["key" ]))
39
+ path_t_important = path .join ("files_important" , "{0}.txt" .format (entry ["key" ]))
40
+
29
41
if path .isfile (path_t ):
30
42
continue
31
43
32
44
entry_request = requests .get ("https://scrape.pastebin.com/api_scrape_item.php?i={0}"
33
45
.format (entry ["key" ]))
34
46
35
- f = open (path_t , "w+" )
36
- f .write (entry_request .text )
37
- f .close ()
47
+ entry_file = open (path_t , "w+" )
48
+ entry_file .write (entry_request .text )
49
+ entry_file .close ()
50
+
51
+ if keywords is not None :
52
+ for keyword in keywords :
53
+ if keyword .upper () in entry_request .text :
54
+ print (" [KEYWORD] Paste \' {0}\' contains keyword \' {1}\' " .format (entry ["key" ], keyword ))
55
+
56
+ entry_file = open (path_t_important , "w+" )
57
+ entry_file .write (entry_request .text )
58
+ entry_file .close ()
59
+
60
+ break
38
61
39
62
bar .next ()
40
63
41
- bar .finish ()
64
+ bar .finish ()
42
65
43
- if skipped_pastes is not 0 :
44
- status ("Skipped {0} previously fetched pastes" .format (skipped_pastes ))
66
+ if skipped_pastes is not 0 :
67
+ status ("Skipped {0} previously fetched pastes" .format (skipped_pastes ))
45
68
46
69
status ("Hibernating for 60 seconds..." )
47
70
print ()
48
71
threading .Timer (60 , main ).start ()
49
72
50
73
74
+ # make sure file directories exists
51
75
if not path .isdir ("files" ):
52
76
status ("No file directory found, creating..." )
53
77
mkdir ("files" )
54
78
79
+ if not path .isdir ("files_important" ):
80
+ status ("No important file directory found, creating..." )
81
+ mkdir ("files_important" )
82
+
83
+ # parse arguments
84
+ keywords = None
85
+
86
+ parser = argparse .ArgumentParser (description = "A script to scrape pastebin.com with optional keyword search" )
87
+ parser .add_argument ("--keywords" , "-k" , help = "A file containing keywords for the search" )
88
+ args = parser .parse_args ()
89
+
90
+ if args .keywords is not None :
91
+ f = open (args .keywords )
92
+ keywords = f .readlines ()
93
+ f .close ()
94
+
95
+ status ("Loaded {0} keywords" .format (len (keywords )))
96
+
55
97
main ()
0 commit comments