Skip to content

Commit

Permalink
Keyword update
Browse files Browse the repository at this point in the history
  • Loading branch information
InitialPosition committed Dec 26, 2019
1 parent 7d06100 commit b3e11ea
Showing 1 changed file with 49 additions and 7 deletions.
56 changes: 49 additions & 7 deletions scrape.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,15 @@
from datetime import datetime
import requests
from os import path, mkdir
from progress.bar import Bar
import argparse
from sys import modules

try:
from progress.bar import Bar

except ModuleNotFoundError:
print("Make sure the progress module is installed.")
exit(0)


def status(message):
Expand All @@ -12,6 +20,7 @@ def status(message):
def main():
status("Fetching latest pastes...")

# fetch latest 100 pastes
current_request = requests.get("https://scrape.pastebin.com/api_scraping.php?limit=100")
current_json = current_request.json()

Expand All @@ -20,36 +29,69 @@ def main():

for entry in current_json:
path_t = path.join("files", "{0}.txt".format(entry["key"]))

if path.isfile(path_t):
skipped_pastes += 1

with Bar("Processing", max=len(current_json) - skipped_pastes, fill=">") as bar:
for entry in current_json:
path_t = path.join("files", "{0}.txt".format(entry["key"]))
path_t_important = path.join("files_important", "{0}.txt".format(entry["key"]))

if path.isfile(path_t):
continue

entry_request = requests.get("https://scrape.pastebin.com/api_scrape_item.php?i={0}"
.format(entry["key"]))

f = open(path_t, "w+")
f.write(entry_request.text)
f.close()
entry_file = open(path_t, "w+")
entry_file.write(entry_request.text)
entry_file.close()

if keywords is not None:
for keyword in keywords:
if keyword.upper() in entry_request.text:
print(" [KEYWORD] Paste \'{0}\' contains keyword \'{1}\'".format(entry["key"], keyword))

entry_file = open(path_t_important, "w+")
entry_file.write(entry_request.text)
entry_file.close()

break

bar.next()

bar.finish()
bar.finish()

if skipped_pastes is not 0:
status("Skipped {0} previously fetched pastes".format(skipped_pastes))
if skipped_pastes is not 0:
status("Skipped {0} previously fetched pastes".format(skipped_pastes))

status("Hibernating for 60 seconds...")
print()
threading.Timer(60, main).start()


# make sure file directories exists
if not path.isdir("files"):
status("No file directory found, creating...")
mkdir("files")

if not path.isdir("files_important"):
status("No important file directory found, creating...")
mkdir("files_important")

# parse arguments
keywords = None

parser = argparse.ArgumentParser(description="A script to scrape pastebin.com with optional keyword search")
parser.add_argument("--keywords", "-k", help="A file containing keywords for the search")
args = parser.parse_args()

if args.keywords is not None:
f = open(args.keywords)
keywords = f.readlines()
f.close()

status("Loaded {0} keywords".format(len(keywords)))

main()

0 comments on commit b3e11ea

Please sign in to comment.