Initial commit - no VCS used to this point

k0rnh0li0 · Jul 7, 2021 · f6c6012 · f6c6012
commit f6c6012
Show file tree

Hide file tree

Showing 7 changed files with 690 additions and 0 deletions.
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,4 @@
+downloads/
+results.json
+
+auth.priv
diff --git a/LICENSE.txt b/LICENSE.txt
diff --git a/README.txt b/README.txt
@@ -0,0 +1,89 @@
+
+    ======================================
+    =====         SNOOP DAWG         =====
+    =====       K0RNH0LI0 2021       =====
+    =====                            =====
+    =====  CATCH MOFUCKAS LACKIN!!!  =====
+    ======================================
+
+
+This script scans recent page(s) of the GitHub 
+event stream for commits containing 
+"interesting" regexes as specified in 
+lists/patterns.txt.
+
+Be sure to send a PR if you have any juicy
+regexes to share.
+
+File names/extensions can be excluded from
+searching by adding them to blacklists.txt.
+
+For additional help with usage, please see the
+APPENDIX section.
+
+SNOOPDAWG is Free Software, licensed under the
+terms of the GNU GPLv2. See LICENSE.txt for
+more information.
+
+
+ === SETUP ===
+
+1. Clone this repository.
+
+2. Create a file called auth.priv that contains 
+   your GitHub username on the first line and
+   your OAuth token on the second line. And then
+   don't commit it to version control ;)
+
+   The OAuth token should have permission to read
+   public repos.
+
+3. Run ./snoopdawg.py
+
+4. The script will scan the event stream for your
+   regexes. Results for matches will be stored in
+   results.json in the following format:
+
+     {
+       "<FILE HASH>": {
+         "raw_url": "<URL>",
+         "match": "<expression that matched>"
+       },
+       [...]
+     }
+
+   Files/diffs will be saved to downloads/, and
+   will be named by their file hash.
+
+   To not download files, and only create a
+   results.json file, start the script with the
+   flag: --no-dl
+
+   By default, the script will only run one scan.
+   To continue scanning until the script is
+   interrupted, use the flag: --loop
+
+5. Catch em lackin
+
+
+ === APPENDIX ===
+
+  A)
+    IT'S THE BOW TO THE WOW
+    CREEPIN AND CRAWLIN
+    YIGGY YES YALLIN
+    SNOOP DAWGGY DAWG IN
+    THE MOTHAFUCKIN HOUSE
+
+  B)
+    IT'S A HACKER BAZAAR
+    IT'S A MARKETPLACE FOR *.*
+    EXPLOITS, VULNS, AND CARDS
+    FULL DUMPS AND ACCOUNTS
+    AND ARRAYS OF CHARS    
+
+  C)
+    CAUSE WHAT YOU SEE YOU MIGHT NOT GET
+    AND WE CAN BET, SO DON'T YOU GET SOUPED YET
+    SCHEMING ON A THING, THAT'S A MIRAGE
+    I'M TRYNA TELL YOU NOW IT'S SABOTAGE
diff --git a/cleandl.sh b/cleandl.sh
@@ -0,0 +1,5 @@
+#!/usr/bin/env bash
+# run this to delete all downloaded content
+
+rm results.json
+rm -rf downloads/
diff --git a/lists/blacklist.txt b/lists/blacklist.txt
@@ -0,0 +1,12 @@
+# filename/extension blacklist
+# lines that begin with # are treated as comments
+.xtb
+.jpg
+.png
+.bmp
+.mp3
+.md
+.dll
+yarn.lock
+package.json
+package-lock.json
diff --git a/lists/patterns.txt b/lists/patterns.txt
@@ -0,0 +1,12 @@
+# interesting regexes to search commits for
+# lines that begin with # are treated as comments
+sername
+assword
+#[13][a-km-zA-HJ-NP-Z1-9]{25,34}
+[5KL][1-9A-HJ-NP-Za-km-z]{50,51}
+priv_key
+privkey
+private_key
+admin_pass
+admin_user
+auth_token
diff --git a/snoopdawg.py b/snoopdawg.py
@@ -0,0 +1,229 @@
+#!/usr/bin/env python3
+#
+# SNOOPDAWG
+# K0RNH0LI0 2021
+#
+# Searches the GitHub event stream for "interesting" commits.
+# See README.txt for more information.
+#
+# This program is Free Software, licensed under the terms of
+# GNU GPLv2. See LICENSE.txt for details.
+#
+
+import os
+import re
+import sys
+import time
+import json
+import requests
+
+# URL of GitHub's event API
+EVT_URL = "https://api.github.com/events"
+
+# GitHub Username
+USERNAME=""
+# GitHub OAuth token
+TOKEN=""
+
+# number of pages to check per scan
+PAGES = 1
+
+# whether or not to save files/diffs to downloads/
+DOWNLOAD = True
+# if True, the script will continue scanning until
+# it is interrupted
+# if False, the script will scan once
+LOOP = False
+
+# re patterns to search commits for
+PATTERNS = []
+# blacklist of file extensions to not examine
+BLACKLIST = []
+
+# dictionary for storing results
+# will be written to results.json
+RESULTS = {}
+
+# first ID of the previous scan
+# current scan will stop if this is reached
+PREV_START = ""
+# temporary start tracker for each scan
+T_START = ""
+
+def api_get(url, PARAMS=None):
+    resp = requests.get(
+        url,
+        params=PARAMS,
+        auth=(USERNAME, TOKEN)
+    )
+
+    if resp.status_code != 200:
+        print(resp.text)
+        return None
+    else:
+        return resp.json()
+
+def get_events(page):
+    REQ_PARAMS = {
+        "Accept": "application/vnd.github.v3+json",
+        "per_page": 100,
+        "page": page
+    }
+    return api_get(EVT_URL, REQ_PARAMS)
+
+def check_file(f):
+    """
+    Search the JSON object representing a file in
+    a commit for the regex we're interested in.
+    """
+    if f["raw_url"] is None:
+        return False
+    # check blacklisted extensions
+    for ext in BLACKLIST:
+        if f["raw_url"].endswith(ext):
+            return False
+
+    scantext = ""
+    if "patch" in f:
+        scantext = f["patch"]
+    else:
+        if os.path.exists("downloads/" + f["sha"]):
+            print(f["sha"] + " exists, skipping")
+            return True
+        scantext = requests.get(f["raw_url"]).text
+
+    for ptn in PATTERNS:
+        if re.search(ptn, scantext) is not None:
+            print(f"MATCH {f['sha']} {ptn}")
+            RESULTS[f["sha"]] = {
+                "raw_url": f["raw_url"],
+                "match": ptn
+            }
+
+            #\TODO "stream" to results file instead of
+            # rewriting the whole dict every time
+            with open("results.json", "w") as of:
+                of.write(json.dumps(RESULTS))
+
+            if DOWNLOAD:
+                # create downloads directory if necessary
+                if not os.path.exists("downloads/"):
+                    os.mkdir("downloads")
+                # save file/diff to downloads folder with
+                # SHA hash for name
+                with open("downloads/" + f["sha"], "w") as of:
+                    of.write(scantext)
+            return True
+    return False
+
+def check_commit(commit_url):
+    """
+    Check all files in a commit based on commit URL.
+    """
+    commit = api_get(commit_url)
+
+    if commit is None:
+        return
+    elif not "files" in commit:
+        return
+
+    for f in commit["files"]:
+        check_file(f)
+
+def check_push_event(evt):
+    """
+    Check all commits in a push event based on
+    a PushEvent JSON object.
+    """
+    if not "payload" in evt:
+       return
+    elif not "commits" in evt["payload"]:
+        return
+
+    for commit in evt["payload"]["commits"]:
+        check_commit(commit["url"])
+
+def scanpage(page, pagenum):
+    """
+    Scan all events in an event page for PushEvents, then
+    pass them to check_push_event().
+    page: JSON object representing a page
+    pagenum: index of this page (1-indexed)
+    """
+    global PREV_START
+    global T_START
+
+    evt = [x for x in page if x["type"] == "PushEvent"]
+    if len(evt) == 0:
+        return 0
+    if pagenum == 1:
+        T_START = evt[0]["id"]
+
+    for x in evt:
+        if x["id"] == PREV_START:
+            print("reached prev start")
+            return None
+        check_push_event(x)
+    return 1
+
+def scan_pages(numpages):
+    """
+    Scan the number of pages from the event
+    stream specified by numpages.
+    """
+    global PREV_START
+    global T_START
+    pages = []
+    # pre-load pages
+    for i in range(1, numpages + 1):
+        evt = get_events(i)
+        if evt is not None:
+            pages.append(evt)
+    # scan loaded pages
+    for i in range(len(pages)):
+        if scanpage(pages[i], i + 1) is None:
+            break
+    PREV_START = T_START
+
+def load_list(filename, dest):
+    """
+    Load a wordlist from a text file into a list.
+    Lines starting with # are comments.
+    filename: list to load
+    dest: list object to append to
+    """
+    with open(filename, "r") as f:
+        for line in f.readlines():
+            if line[0] != "#": # comment lines
+                dest.append(line.replace("\n", ""))
+
+if __name__ == "__main__":
+    # load OAuth token
+    if not os.path.exists("auth.priv"):
+        print("auth.priv file not found.")
+        print("See README.txt")
+        exit()
+    with open("auth.priv", "r") as f:
+        USERNAME = f.readline().replace("\n", "")
+        TOKEN = f.readline().replace("\n", "")
+
+    # check flags
+    if "--no-dl" in sys.argv:
+        DOWNLOAD = False
+    if "--loop" in sys.argv:
+        LOOP = True
+
+    # load re patterns
+    load_list("lists/patterns.txt", PATTERNS)
+    # load file extension blacklist
+    load_list("lists/blacklist.txt", BLACKLIST)
+
+    # load results.json if it exists
+    if os.path.exists("results.json"):
+        with open("results.json", "r") as f:
+            RESULTS = json.loads(f.read())
+
+    while True:
+        scan_pages(PAGES)
+        if not LOOP:
+            break