Skip to content


hello world
Browse files Browse the repository at this point in the history
  • Loading branch information
Stefan Wieczorek committed Feb 7, 2022
1 parent 7ce6df3 commit 2b8b6eb
Show file tree
Hide file tree
Showing 15 changed files with 856 additions and 0 deletions.
42 changes: 42 additions & 0 deletions config.ini
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
# In all sections, tld defintions can either be explicit tlds, or one the supplied tld lists
# These lists are: all_tlds, all_tlds_incl_slds, abused, top5, top15

# TLDs = abused

# TLDs = abused

# TLDs = abused

# TLDs = abused

# TLDs = abused

# TLDs = top5

# Which wikipedia articles should be used to generate the wordlist
# Terms = cyber computer hacker malware software tiger

# How many of these related terms should be scanned
# Count = 750

# Which wikipedia language version should be used
# Language = en

# TLDs = abused

# Additional wordlists - please specify complete path
# Wordlists = /home/fant/wordlist/animals.txt

25 changes: 25 additions & 0 deletions
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
#!/usr/bin/env python3

import argparse
import sqlite3

parser = argparse.ArgumentParser(description="Dump the domainsquatting database")
parser.add_argument("dbfile", type=str, help="Squatting database to load")
parser.add_argument("--filter", nargs="+", type=str, help="Filter master names containing these strings")
args = parser.parse_args()

# Setup the database
con = sqlite3.connect(args.dbfile)
cur = con.cursor()
for row in cur.execute("SELECT * FROM domains"):
filtered = False
if args.filter:
for f in args.filter:
if f in row[2]:
filtered = True

if not filtered:

309 changes: 309 additions & 0 deletions
Original file line number Diff line number Diff line change
@@ -0,0 +1,309 @@
#!/usr/bin/env python3

import dns.resolver
import string
import time
import copy
import sys
import argparse
import configparser
import sqlite3
import queue
import logging
import threading

from utils.parser_checks import parser_check_rate, parser_check_threads
from utils.squat_generator import generate_char_simple, generate_homoglyphs, generate_numbers
from utils.wikipedia_wordlist import generate_wikipedia_wordlist
from utils.tld_generator import TLDGenerator
from utils.utils import dedup

URL_CHARS = list(string.ascii_lowercase) + list(string.digits) + ["-", "ä", "ö", "ü"]
START_TIME = time.time()

parser = argparse.ArgumentParser(description="Search for possible squatting domains")
parser.add_argument("scanword", type=str, help="Which domain name / word to scan (without the TLD)")
parser.add_argument("--config", type=str, default="config.ini", help="Config file to use")
parser.add_argument("--all", default=False, action='store_true', help="Execute all scanning techniques")
parser.add_argument("--tlds", default=False, action='store_true', help="Scan all tlds")
parser.add_argument("--slds", default=False, action='store_true', help="Scan all tlds and known slds")
parser.add_argument("--homo", default=False, action='store_true', help="Scan homoglyphs")
parser.add_argument("--chars", default=False, action='store_true', help="Scan character replacements and additions")
parser.add_argument("--numbers", default=False, action='store_true', help="Iterate numbers in the domain name")
parser.add_argument("--phishing", default=False, action='store_true', help="Scan phishing wordlist")
parser.add_argument("--ccodes", default=False, action='store_true', help="Scan two-letter country codes")
parser.add_argument("--wiki", default=False, action='store_true', help="Scan Wikipedia generated related word lists")
parser.add_argument("--wikiterms", type=str, default=None, nargs="+", help="Wikipedia terms to scan instead of terms from config.ini")
parser.add_argument("--wordlist", default=False, action='store_true', help="Scan wordlists defined in config file")
parser.add_argument("--forcetlds", type=str, default=None, nargs="+", help="Override scan tlds set in the config.ini file")
parser.add_argument("--tldfile", type=str, default=None, nargs="?", help="Instead of downloading a fresh copy from, use this as a list of all tlds and slds")
parser.add_argument("--threads", type=parser_check_threads, default=5, help="Number of scanthreads to start")
parser.add_argument("--rate", type=parser_check_rate, default=10, help="Scans per second to aim for")

args = parser.parse_args()

config = configparser.ConfigParser()

SCANWORD = args.scanword.lower()
glob_scancounter = 0
glob_found_domains = 0
glob_scan_delay = 1.0
glob_scanpool = queue.SimpleQueue()
glob_known_hosts = {}

con = sqlite3.connect(f"{SCANWORD}.db")
cur = con.cursor()
cur.execute("CREATE TABLE IF NOT EXISTS domains (host text, tld text, master text, first_seen text, last_seen text, accepts_anyhost bool)")

# Setup logging
logging.basicConfig(format="%(asctime)s %(message)s", level=logging.INFO)

def load_wordlist_file(filename):
words = []
with open(filename, "r") as f:
for line in f.readlines():
words += line.lower().split()

returnlist = dedup(words)
return returnlist

def scan_host(host, tlds):
global glob_known_hosts, glob_scanpool
if host in glob_known_hosts:
# We cannot remove anything from the queue, so we add all of the tlds that will not already be scanned
remaining_tlds = [tld for tld in tlds if tld not in glob_known_hosts[host]]
if len(remaining_tlds) > 0:
glob_scanpool.put((host, remaining_tlds))
glob_known_hosts[host] += remaining_tlds
glob_known_hosts[host] = tlds
glob_scanpool.put((host, tlds))

def scan_wordlist(scanword, wordlist, tld_list):
for word in wordlist:
scan_host(f"{scanword}{word}", tld_list)
scan_host(f"{scanword}-{word}", tld_list)
scan_host(f"{word}{scanword}", tld_list)
scan_host(f"{word}-{scanword}", tld_list)

class ScanThread(threading.Thread):
def _touch_domain(self, host, tld):
soa_records = dns.resolver.resolve(".".join([host, tld]), "SOA")
except dns.resolver.NXDOMAIN:
return False
except Exception as e:
return False

# Search the SOA records for master names
master_names = []
for soa_record in soa_records.response.answer:
for rdata in soa_record:
except Exception as e:
return False

return list(set(master_names)) # Deduplicate

def _note_domain(self, host, tld, master_name, accepts_anyhost, first_seen=time.time(), last_seen=time.time()):
con = sqlite3.connect(f"{SCANWORD}.db")
cur = con.cursor()
domain_to_insert = (host, tld, master_name, str(first_seen), str(last_seen), accepts_anyhost)
sql = ("INSERT INTO domains(host,tld,master,first_seen,last_seen,accepts_anyhost) VALUES (?, ?, ?, ?, ?, ?)")
con.execute(sql, domain_to_insert)

def scan_tlds(self, to_scan):
global glob_scancounter, glob_scan_delay, glob_found_domains
host = to_scan[0]

logging.debug(f"Scanning {to_scan[0]} on {to_scan[1]}")

for tld in to_scan[1]:
glob_scancounter += 1
dns_result = self._touch_domain(host, tld)
if dns_result:
logging.warning(f"Found: {host}.{tld} on {dns_result[0]}")
accepts_anyhost = True if self._touch_domain("jdwqnwqqnwdsauuwuwdnakkkasd", tld) else False
self._note_domain(host, tld, dns_result[0], accepts_anyhost)
glob_found_domains += 1


def run(self):
global glob_scan_delay, glob_scanpool, glob_tlds_to_scan
while True:
to_scan = glob_scanpool.get() # Blocks until item is available
if to_scan == "STOP":"Scan thread {threading.get_ident()} finished")
break # Terminate the thread

def __init__(self):
super(ScanThread, self).__init__()
self.busy = False

class WatchThread(threading.Thread):
def run(self):
global glob_scan_delay, glob_scancounter, glob_scanpool, glob_known_hosts, glob_found_domains, START_TIME
last_scancounter = 0
i = 0
while True:
# Readjust scan delay
current_scanrate = glob_scancounter-last_scancounter
if time.time() > START_TIME+5:
adjustment_factor = current_scanrate / self.target_scanrate
glob_scan_delay *= adjustment_factor
glob_scan_delay = max(0.1, glob_scan_delay) # Make sure that the we dont not accidentially DDOS somebody
glob_scan_delay = min(20, glob_scan_delay) # Make sure that the delay does not occilate to wildly

# Print current status
if i%30 == 10 and glob_scancounter > 0:
domains_to_scan = sum(map(lambda x: len(x), glob_known_hosts.values()))
remaining_scantime = round(domains_to_scan/(glob_scancounter/(time.time()-START_TIME))/3600, 2)"")"Running since {round((time.time()-START_TIME)/3600,2)}h, about {remaining_scantime}h left")"Scanned {glob_scancounter} of {domains_to_scan} ({round((glob_scancounter/(domains_to_scan))*100, 2)}%), found {glob_found_domains} domains")"Current scanrate is {current_scanrate} scans/sec, scan-delay is {round(glob_scan_delay,2)}s")"")

last_scancounter = copy.copy(glob_scancounter)
i += 1

def __init__(self, target_scanrate):
super(WatchThread, self).__init__()
self.target_scanrate = target_scanrate

tld_gen = TLDGenerator(tldfile=args.tldfile, forcedtlds=args.forcetlds) # Initialize the tld generator

# Start all threads
watch_thread = WatchThread(args.rate)
watch_thread.daemon = True

threadpool = []
for i in range(0, args.threads):

# Scan all tlds and known slds
if args.all or args.slds:"Scanning tlds and known slds")

# Split this task into smaller chunks to make it multi-threaded
tlds_to_scan = tld_gen.generate_tlds("all_tlds_incl_slds")
for i in range(0, len(tlds_to_scan), 10):
scan_host(SCANWORD, tlds_to_scan[i:i+10])

# Scan all tlds
elif args.tlds:"Scanning tlds")

# Split this task into smaller chunks to make it multi-threaded
tlds_to_scan = tld_gen.generate_tlds("all_tlds")
for i in range(0, len(tlds_to_scan), 10):
scan_host(SCANWORD, tlds_to_scan[i:i+10])

# Scan for character replacement and addition squatting
if args.all or args.chars:"Scanning simple char replacements")

for host in generate_char_simple(SCANWORD):
if host != SCANWORD:
scan_host(host, tld_gen.generate_tlds(config["CHARS"].get("TLDs", "abused")))

# Scan homoglyphs
if args.all or args.homo:"Scanning homoglyphs")

for host in generate_homoglyphs(SCANWORD):
scan_host(host, tld_gen.generate_tlds(config["HOMO"].get("TLDs", "abused")))

# Scan for all country codes
if args.all or args.ccodes:"Scanning country codes")
tld_gen.generate_tlds(config["CCODES"].get("TLDs", "abused"))

# Scan often-used phshing wordlist
if args.all or args.phishing:"Scanning phishing wordlist")
tld_gen.generate_tlds(config["PHISHING"].get("TLDs", "abused"))

# Scan numbers
if args.all or args.numbers:"Scanning numbers")

for host in generate_numbers(SCANWORD):
scan_host(host, tld_gen.generate_tlds(config["NUMBERS"].get("TLDs", "abused")))

# Scan additional wordlists
if args.all or args.wordlist:"Scanning supplied wordlist")
for wordlist_path in config["WORDLIST"].get("Wordlists", "").split():
tld_gen.generate_tlds(config["WORDLIST"].get("TLDs", "abused"))

# Scan wikipedia wordlists
if args.all or
# Generate and scan related wordlist
if args.wikiterms:
rt = args.wikiterms
rt = config["WIKI"].get("Terms", "").split()"Generating wikipedia wordlist of the related terms {', '.join(rt)}")

if rt == []:
logging.warn("Not scanning wikipedia wordlist, since no terms were supplied")
else:"Scanning generated wikipedia wordlist")

related_terms = {}
for r in rt:
for term, relevance in generate_wikipedia_wordlist(config["WIKI"].get("Language", "en"), r):
if term in related_terms:
related_terms[term] += relevance
related_terms[term] = relevance

sorted_related_terms = sorted(related_terms.items(), key=lambda x: x[1], reverse=True)[:config["WIKI"].getint("Count", 750)]

map(lambda x: x[0], sorted_related_terms),
tld_gen.generate_tlds(config["WORDLIST"].get("TLDs", "top5"))

logging.warning(f"Scanning {sum(map(lambda x: len(x), glob_known_hosts.values()))} domains...")

for i in range(0, args.threads):
glob_scanpool.put("STOP") # Scan threads terminate when fetching this signal

for t in threadpool:

logging.warning("All scans finished")
2 changes: 2 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
dnspython >= 2.2.0
requests >= 2.27.1

0 comments on commit 2b8b6eb

Please sign in to comment.