From 2b8b6ebfb5e7ab48e465cef9254a76bb3ef1ee30 Mon Sep 17 00:00:00 2001 From: Stefan Wieczorek Date: Mon, 7 Feb 2022 21:56:38 +0100 Subject: [PATCH] hello world --- config.ini | 42 +++++ dump_db.py | 25 +++ monodon.py | 309 ++++++++++++++++++++++++++++++++++++ requirements.txt | 2 + tlds/abused.txt | 24 +++ tlds/top15.txt | 15 ++ tlds/top5.txt | 5 + utils/parser_checks.py | 15 ++ utils/squat_generator.py | 130 +++++++++++++++ utils/tld_generator.py | 72 +++++++++ utils/utils.py | 3 + utils/wikipedia_wordlist.py | 30 ++++ wordlists/country_codes.txt | 18 +++ wordlists/phishing.txt | 59 +++++++ wordlists/similar_chars.txt | 107 +++++++++++++ 15 files changed, 856 insertions(+) create mode 100644 config.ini create mode 100755 dump_db.py create mode 100755 monodon.py create mode 100644 requirements.txt create mode 100644 tlds/abused.txt create mode 100644 tlds/top15.txt create mode 100644 tlds/top5.txt create mode 100644 utils/parser_checks.py create mode 100644 utils/squat_generator.py create mode 100644 utils/tld_generator.py create mode 100644 utils/utils.py create mode 100644 utils/wikipedia_wordlist.py create mode 100644 wordlists/country_codes.txt create mode 100644 wordlists/phishing.txt create mode 100644 wordlists/similar_chars.txt diff --git a/config.ini b/config.ini new file mode 100644 index 0000000..5cf82a4 --- /dev/null +++ b/config.ini @@ -0,0 +1,42 @@ +# In all sections, tld defintions can either be explicit tlds, or one the supplied tld lists +# These lists are: all_tlds, all_tlds_incl_slds, abused, top5, top15 + +[CHARS] +# TLDs = abused + + +[HOMO] +# TLDs = abused + + +[CCODES] +# TLDs = abused + + +[PHISHING] +# TLDs = abused + + +[NUMBERS] +# TLDs = abused + + +[WIKI] +# TLDs = top5 + +# Which wikipedia articles should be used to generate the wordlist +# Terms = cyber computer hacker malware software tiger + +# How many of these related terms should be scanned +# Count = 750 + +# Which wikipedia language version should be used +# Language = en + + +[WORDLIST] +# TLDs = abused + +# Additional wordlists - please specify complete path +# Wordlists = /home/fant/wordlist/animals.txt + diff --git a/dump_db.py b/dump_db.py new file mode 100755 index 0000000..cffef36 --- /dev/null +++ b/dump_db.py @@ -0,0 +1,25 @@ +#!/usr/bin/env python3 + +import argparse +import sqlite3 + +parser = argparse.ArgumentParser(description="Dump the domainsquatting database") +parser.add_argument("dbfile", type=str, help="Squatting database to load") +parser.add_argument("--filter", nargs="+", type=str, help="Filter master names containing these strings") +args = parser.parse_args() + +# Setup the database +con = sqlite3.connect(args.dbfile) +cur = con.cursor() +for row in cur.execute("SELECT * FROM domains"): + filtered = False + if args.filter: + for f in args.filter: + if f in row[2]: + filtered = True + break + + if not filtered: + print(f"{row[0]}.{row[1]}\t{row[2]}") + +con.close() \ No newline at end of file diff --git a/monodon.py b/monodon.py new file mode 100755 index 0000000..e59d4c8 --- /dev/null +++ b/monodon.py @@ -0,0 +1,309 @@ +#!/usr/bin/env python3 + +import dns.resolver +import string +import time +import copy +import sys +import argparse +import configparser +import sqlite3 +import queue +import logging +import threading + +from utils.parser_checks import parser_check_rate, parser_check_threads +from utils.squat_generator import generate_char_simple, generate_homoglyphs, generate_numbers +from utils.wikipedia_wordlist import generate_wikipedia_wordlist +from utils.tld_generator import TLDGenerator +from utils.utils import dedup + +URL_CHARS = list(string.ascii_lowercase) + list(string.digits) + ["-", "ä", "ö", "ü"] +START_TIME = time.time() + +parser = argparse.ArgumentParser(description="Search for possible squatting domains") +parser.add_argument("scanword", type=str, help="Which domain name / word to scan (without the TLD)") +parser.add_argument("--config", type=str, default="config.ini", help="Config file to use") +parser.add_argument("--all", default=False, action='store_true', help="Execute all scanning techniques") +parser.add_argument("--tlds", default=False, action='store_true', help="Scan all tlds") +parser.add_argument("--slds", default=False, action='store_true', help="Scan all tlds and known slds") +parser.add_argument("--homo", default=False, action='store_true', help="Scan homoglyphs") +parser.add_argument("--chars", default=False, action='store_true', help="Scan character replacements and additions") +parser.add_argument("--numbers", default=False, action='store_true', help="Iterate numbers in the domain name") +parser.add_argument("--phishing", default=False, action='store_true', help="Scan phishing wordlist") +parser.add_argument("--ccodes", default=False, action='store_true', help="Scan two-letter country codes") +parser.add_argument("--wiki", default=False, action='store_true', help="Scan Wikipedia generated related word lists") +parser.add_argument("--wikiterms", type=str, default=None, nargs="+", help="Wikipedia terms to scan instead of terms from config.ini") +parser.add_argument("--wordlist", default=False, action='store_true', help="Scan wordlists defined in config file") +parser.add_argument("--forcetlds", type=str, default=None, nargs="+", help="Override scan tlds set in the config.ini file") +parser.add_argument("--tldfile", type=str, default=None, nargs="?", help="Instead of downloading a fresh copy from publicsuffix.org, use this as a list of all tlds and slds") +parser.add_argument("--threads", type=parser_check_threads, default=5, help="Number of scanthreads to start") +parser.add_argument("--rate", type=parser_check_rate, default=10, help="Scans per second to aim for") + +args = parser.parse_args() + +config = configparser.ConfigParser() +config.read(args.config) + +SCANWORD = args.scanword.lower() +glob_scancounter = 0 +glob_found_domains = 0 +glob_scan_delay = 1.0 +glob_scanpool = queue.SimpleQueue() +glob_known_hosts = {} + +con = sqlite3.connect(f"{SCANWORD}.db") +cur = con.cursor() +cur.execute("CREATE TABLE IF NOT EXISTS domains (host text, tld text, master text, first_seen text, last_seen text, accepts_anyhost bool)") +con.commit() +con.close() + +# Setup logging +logging.basicConfig(format="%(asctime)s %(message)s", level=logging.INFO) + +def load_wordlist_file(filename): + words = [] + with open(filename, "r") as f: + for line in f.readlines(): + words += line.lower().split() + + returnlist = dedup(words) + return returnlist + +def scan_host(host, tlds): + global glob_known_hosts, glob_scanpool + if host in glob_known_hosts: + # We cannot remove anything from the queue, so we add all of the tlds that will not already be scanned + remaining_tlds = [tld for tld in tlds if tld not in glob_known_hosts[host]] + if len(remaining_tlds) > 0: + glob_scanpool.put((host, remaining_tlds)) + glob_known_hosts[host] += remaining_tlds + else: + glob_known_hosts[host] = tlds + glob_scanpool.put((host, tlds)) + + +def scan_wordlist(scanword, wordlist, tld_list): + for word in wordlist: + scan_host(f"{scanword}{word}", tld_list) + scan_host(f"{scanword}-{word}", tld_list) + scan_host(f"{word}{scanword}", tld_list) + scan_host(f"{word}-{scanword}", tld_list) + + +class ScanThread(threading.Thread): + def _touch_domain(self, host, tld): + try: + soa_records = dns.resolver.resolve(".".join([host, tld]), "SOA") + except dns.resolver.NXDOMAIN: + return False + except Exception as e: + return False + + # Search the SOA records for master names + master_names = [] + for soa_record in soa_records.response.answer: + for rdata in soa_record: + try: + master_names.append(rdata.mname.to_text()) + except Exception as e: + return False + + return list(set(master_names)) # Deduplicate + + def _note_domain(self, host, tld, master_name, accepts_anyhost, first_seen=time.time(), last_seen=time.time()): + con = sqlite3.connect(f"{SCANWORD}.db") + cur = con.cursor() + domain_to_insert = (host, tld, master_name, str(first_seen), str(last_seen), accepts_anyhost) + sql = ("INSERT INTO domains(host,tld,master,first_seen,last_seen,accepts_anyhost) VALUES (?, ?, ?, ?, ?, ?)") + con.execute(sql, domain_to_insert) + con.commit() + con.close() + + + def scan_tlds(self, to_scan): + global glob_scancounter, glob_scan_delay, glob_found_domains + host = to_scan[0] + + logging.debug(f"Scanning {to_scan[0]} on {to_scan[1]}") + + for tld in to_scan[1]: + glob_scancounter += 1 + dns_result = self._touch_domain(host, tld) + if dns_result: + logging.warning(f"Found: {host}.{tld} on {dns_result[0]}") + accepts_anyhost = True if self._touch_domain("jdwqnwqqnwdsauuwuwdnakkkasd", tld) else False + self._note_domain(host, tld, dns_result[0], accepts_anyhost) + glob_found_domains += 1 + + time.sleep(glob_scan_delay) + + + def run(self): + global glob_scan_delay, glob_scanpool, glob_tlds_to_scan + while True: + to_scan = glob_scanpool.get() # Blocks until item is available + if to_scan == "STOP": + logging.info(f"Scan thread {threading.get_ident()} finished") + break # Terminate the thread + else: + self.scan_tlds(to_scan) + + + def __init__(self): + super(ScanThread, self).__init__() + self.busy = False + + +class WatchThread(threading.Thread): + def run(self): + global glob_scan_delay, glob_scancounter, glob_scanpool, glob_known_hosts, glob_found_domains, START_TIME + last_scancounter = 0 + i = 0 + while True: + # Readjust scan delay + current_scanrate = glob_scancounter-last_scancounter + if time.time() > START_TIME+5: + adjustment_factor = current_scanrate / self.target_scanrate + glob_scan_delay *= adjustment_factor + glob_scan_delay = max(0.1, glob_scan_delay) # Make sure that the we dont not accidentially DDOS somebody + glob_scan_delay = min(20, glob_scan_delay) # Make sure that the delay does not occilate to wildly + + # Print current status + if i%30 == 10 and glob_scancounter > 0: + domains_to_scan = sum(map(lambda x: len(x), glob_known_hosts.values())) + remaining_scantime = round(domains_to_scan/(glob_scancounter/(time.time()-START_TIME))/3600, 2) + logging.info("") + logging.info(f"Running since {round((time.time()-START_TIME)/3600,2)}h, about {remaining_scantime}h left") + logging.info(f"Scanned {glob_scancounter} of {domains_to_scan} ({round((glob_scancounter/(domains_to_scan))*100, 2)}%), found {glob_found_domains} domains") + logging.info(f"Current scanrate is {current_scanrate} scans/sec, scan-delay is {round(glob_scan_delay,2)}s") + logging.info("") + + last_scancounter = copy.copy(glob_scancounter) + i += 1 + time.sleep(1) + + def __init__(self, target_scanrate): + super(WatchThread, self).__init__() + self.target_scanrate = target_scanrate + + +tld_gen = TLDGenerator(tldfile=args.tldfile, forcedtlds=args.forcetlds) # Initialize the tld generator + +# Start all threads +watch_thread = WatchThread(args.rate) +watch_thread.daemon = True +watch_thread.start() + +threadpool = [] +for i in range(0, args.threads): + threadpool.append(ScanThread()) + threadpool[-1].start() + +# Scan all tlds and known slds +if args.all or args.slds: + logging.info("Scanning tlds and known slds") + + # Split this task into smaller chunks to make it multi-threaded + tlds_to_scan = tld_gen.generate_tlds("all_tlds_incl_slds") + for i in range(0, len(tlds_to_scan), 10): + scan_host(SCANWORD, tlds_to_scan[i:i+10]) + +# Scan all tlds +elif args.tlds: + logging.info("Scanning tlds") + + # Split this task into smaller chunks to make it multi-threaded + tlds_to_scan = tld_gen.generate_tlds("all_tlds") + for i in range(0, len(tlds_to_scan), 10): + scan_host(SCANWORD, tlds_to_scan[i:i+10]) + +# Scan for character replacement and addition squatting +if args.all or args.chars: + logging.info(f"Scanning simple char replacements") + + for host in generate_char_simple(SCANWORD): + if host != SCANWORD: + scan_host(host, tld_gen.generate_tlds(config["CHARS"].get("TLDs", "abused"))) + +# Scan homoglyphs +if args.all or args.homo: + logging.info(f"Scanning homoglyphs") + + for host in generate_homoglyphs(SCANWORD): + scan_host(host, tld_gen.generate_tlds(config["HOMO"].get("TLDs", "abused"))) + +# Scan for all country codes +if args.all or args.ccodes: + logging.info(f"Scanning country codes") + scan_wordlist( + SCANWORD, + load_wordlist_file("wordlists/country_codes.txt"), + tld_gen.generate_tlds(config["CCODES"].get("TLDs", "abused")) + ) + +# Scan often-used phshing wordlist +if args.all or args.phishing: + logging.info(f"Scanning phishing wordlist") + scan_wordlist( + SCANWORD, + load_wordlist_file("wordlists/phishing.txt"), + tld_gen.generate_tlds(config["PHISHING"].get("TLDs", "abused")) + ) + +# Scan numbers +if args.all or args.numbers: + logging.info(f"Scanning numbers") + + for host in generate_numbers(SCANWORD): + scan_host(host, tld_gen.generate_tlds(config["NUMBERS"].get("TLDs", "abused"))) + +# Scan additional wordlists +if args.all or args.wordlist: + logging.info(f"Scanning supplied wordlist") + for wordlist_path in config["WORDLIST"].get("Wordlists", "").split(): + scan_wordlist( + SCANWORD, + load_wordlist_file(wordlist_path), + tld_gen.generate_tlds(config["WORDLIST"].get("TLDs", "abused")) + ) + +# Scan wikipedia wordlists +if args.all or args.wiki: + # Generate and scan related wordlist + if args.wikiterms: + rt = args.wikiterms + else: + rt = config["WIKI"].get("Terms", "").split() + logging.info(f"Generating wikipedia wordlist of the related terms {', '.join(rt)}") + + if rt == []: + logging.warn("Not scanning wikipedia wordlist, since no terms were supplied") + else: + logging.info("Scanning generated wikipedia wordlist") + + related_terms = {} + for r in rt: + for term, relevance in generate_wikipedia_wordlist(config["WIKI"].get("Language", "en"), r): + if term in related_terms: + related_terms[term] += relevance + else: + related_terms[term] = relevance + + sorted_related_terms = sorted(related_terms.items(), key=lambda x: x[1], reverse=True)[:config["WIKI"].getint("Count", 750)] + + scan_wordlist( + SCANWORD, + map(lambda x: x[0], sorted_related_terms), + tld_gen.generate_tlds(config["WORDLIST"].get("TLDs", "top5")) + ) + +logging.warning(f"Scanning {sum(map(lambda x: len(x), glob_known_hosts.values()))} domains...") + +for i in range(0, args.threads): + glob_scanpool.put("STOP") # Scan threads terminate when fetching this signal + +for t in threadpool: + t.join() + +logging.warning("All scans finished") \ No newline at end of file diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..94b497d --- /dev/null +++ b/requirements.txt @@ -0,0 +1,2 @@ +dnspython >= 2.2.0 +requests >= 2.27.1 \ No newline at end of file diff --git a/tlds/abused.txt b/tlds/abused.txt new file mode 100644 index 0000000..256c7ec --- /dev/null +++ b/tlds/abused.txt @@ -0,0 +1,24 @@ +eu +cn +de +ga +gq +ml +cf +cc +tk +app +ooo +xyz +top +fit +com +net +org +pro +info +site +work +rest +buzz +online \ No newline at end of file diff --git a/tlds/top15.txt b/tlds/top15.txt new file mode 100644 index 0000000..2d56fbc --- /dev/null +++ b/tlds/top15.txt @@ -0,0 +1,15 @@ +com +net +org +de +icu +uk +ru +info +top +xyz +tk +cn +ga +cf +nl \ No newline at end of file diff --git a/tlds/top5.txt b/tlds/top5.txt new file mode 100644 index 0000000..8b6d317 --- /dev/null +++ b/tlds/top5.txt @@ -0,0 +1,5 @@ +com +net +org +de +ru diff --git a/utils/parser_checks.py b/utils/parser_checks.py new file mode 100644 index 0000000..7c6b33f --- /dev/null +++ b/utils/parser_checks.py @@ -0,0 +1,15 @@ +import argparse + +def parser_check_threads(value): + value = int(value) + if value <= 0: + raise argparse.ArgumentTypeError(f"Invalid thread count: {value}") + elif value > 64: + raise argparse.ArgumentTypeError(f"Maximum thread count is 64") + return value + +def parser_check_rate(value): + value = int(value) + if value <= 0: + raise argparse.ArgumentTypeError(f"Invalid target rate: {value}") + return value \ No newline at end of file diff --git a/utils/squat_generator.py b/utils/squat_generator.py new file mode 100644 index 0000000..13af15c --- /dev/null +++ b/utils/squat_generator.py @@ -0,0 +1,130 @@ +import string + +def generate_char_simple(scanword): + # Character ommission + for i in range(len(scanword)): + yield scanword[:i] + scanword[i+1:] + + # Character repeat + for i in range(len(scanword)): + yield scanword[:i] + scanword[i]*2 + scanword[i+1:] + + # Swap adjacent characters + for i in range(len(scanword)-1): + yield scanword[:i] + scanword[i+1] + scanword[i] + scanword[i+1:] + + # Insert dashes + for i in range(1, len(scanword)-1): + yield scanword[:i] + "-" + scanword[i:] + + # Replace one character by dash + for i in range(1, len(scanword)-1): + yield scanword[:i] + "-" + scanword[i+1:] + + # Replace all letters once + for i in range(len(scanword)): + for repl in string.ascii_lowercase + string.digits: + yield scanword[:i] + repl + scanword[i+1:] + + # Insert one char + for i in range(len(scanword)+1): + for repl in string.ascii_lowercase + string.digits: + yield scanword[:i] + repl + scanword[i:] + +def _load_homoglyphs(similar_chars_file): + # Currently only loads important homoglypes marked with "!" + + homoglyphs = {} + + with open(similar_chars_file, "r") as f: + for line in f.readlines(): + line = line.strip() + if "#" in line or line == "" or "!" not in line: continue # Skip comments + + # Build the index + for char in line.split(): + if char not in homoglyphs and char != "!": + homoglyphs[char] = [] + + # Add all chars + for char_index in line.split(): + for char_add in line.split(): + if char_index != char_add and char_index != "!" and char_add != "!": + homoglyphs[char_index].append(char_add) + + return homoglyphs + + +def _count_up(combination_bitmask, current_bitmask): + while combination_bitmask != current_bitmask: + for i in range(len(combination_bitmask)): + if current_bitmask[i] < combination_bitmask[i]: + current_bitmask = [0]*i + [current_bitmask[i]+1] + current_bitmask[i+1:] # Increase current index and nullify all before + yield current_bitmask + + +def generate_homoglyphs(scanword): + homoglyphs = _load_homoglyphs("wordlists/similar_chars.txt") + homoglyph_tree = [] + + # Build a 2D tree of possible replacements + for char in scanword: + if char in homoglyphs: + homoglyph_tree.append([char]+homoglyphs[char]) + else: + homoglyph_tree.append([char]) + + # Build a bitmap how many replacements are there per char + combination_bitmask = [] + for char in homoglyph_tree: + combination_bitmask.append(len(char)-1) + + # Iterate all combinations + current_bitmask = [0]*len(combination_bitmask) + + for current_bitmask in _count_up(combination_bitmask, current_bitmask): + out = "" + for i in range(len(current_bitmask)): + out += homoglyph_tree[i][current_bitmask[i]] + yield out + + +def _iterate_numbers(number_tree): + current_number_tree = [] + + for char in number_tree: + if type(n) == int: + current_number_tree.append(0) + else: + current_number_tree.append(char) + + for i, char in enumerate(current_number_tree): + if type(char) == str: + continue + + elif type(char) == int: + if char < int("9"*len(number_tree[i])): + current_number_tree = current_number_tree[:i] + + +def generate_numbers(scanword): + # Count numbers in the word + contained_numbers = 0 + for char in scanword: + if char in string.digits: + contained_numbers += 1 + + if contained_numbers > 0: + for i in range(0, int("9"*contained_numbers)+1): + current_number = str(i).zfill(contained_numbers) + + outword = "" + index = 0 + for char in scanword: + if char in string.digits: + outword += current_number[index] + index += 1 + else: + outword += char + + yield outword \ No newline at end of file diff --git a/utils/tld_generator.py b/utils/tld_generator.py new file mode 100644 index 0000000..ee4ceae --- /dev/null +++ b/utils/tld_generator.py @@ -0,0 +1,72 @@ +import logging +import requests + +class TLDGenerator(): + def _load_tld_file(self, tld_file): + with open(tld_file, "r") as f: + return self._load_raw_tlds(f.read()) + + def _load_raw_tlds(self, raw_tld_list): + # Loads a list of top or n-th level domains + returnlist = [] + + for line in raw_tld_list.split("\n"): + line = line.strip() # Trim whitespaces + if line.startswith("/"): continue # Ignore comments + elif line == "": continue # Ignore empty lines + + if line.startswith("*"): line = line[1:] # Remove leading wildcards + if line.startswith("!"): line = line[1:] # Remove leading exclamation + + # Remove all leading dots + while line.startswith("."): + line = line[1:] + + returnlist.append(line.lower().strip()) + + return returnlist + + def generate_tlds(self, configuration_string): + out_tlds = [] + + if self.forcedtlds: + return self.forcedtlds + + desired_tlds = configuration_string.split() + for desired_tld in desired_tlds: + if desired_tld == "all_tlds": + out_tlds += self.ALL_TLDS + elif desired_tld == "all_tlds_incl_slds": + out_tlds += self.ALL_TLDS_INCL_SLDS + elif desired_tld == "abused": + out_tlds += self.ABUSED_TLDS + elif desired_tld == "top5": + out_tlds += self.TOP5_TLDS + elif desired_tld == "top15": + out_tlds += self.TOP15_TLDS + elif desired_tld in ALL_TLDS_INCL_SLDS: + out_tlds += desired_tld + else: + logging.warn(f"Top-level-domain .{desired_tld} is not public, check if typo. Scanning it anyway.") + + return out_tlds + + def __init__(self, tldfile=None, forcedtlds=None): + self.forcedtlds = forcedtlds + + if tldfile: + self.ALL_TLDS_INCL_SLDS = self._load_tld_file(args.tldfile) + elif forcedtlds: + return # No need to load any files, domains are forced anyway + else: + raw_all_sdls = requests.get("https://publicsuffix.org/list/public_suffix_list.dat").text + self.ALL_TLDS_INCL_SLDS = self._load_raw_tlds(raw_all_sdls) + logging.info(f"Loaded {len(self.ALL_TLDS_INCL_SLDS)} domains from publicsuffix.org") + + # Filter for a list that only contain tlds + self.ALL_TLDS = filter(lambda x: not "." in x, self.ALL_TLDS_INCL_SLDS) + + # Load the top abused and top5 tlds + self.ABUSED_TLDS = self._load_tld_file("tlds/abused.txt") + self.TOP5_TLDS = self._load_tld_file("tlds/top5.txt") + self.TOP15_TLDS = self._load_tld_file("tlds/top15.txt") \ No newline at end of file diff --git a/utils/utils.py b/utils/utils.py new file mode 100644 index 0000000..ab412af --- /dev/null +++ b/utils/utils.py @@ -0,0 +1,3 @@ +# Order preserving list deduplication +def dedup(list_to_dedup): + return list(dict.fromkeys(list_to_dedup)) \ No newline at end of file diff --git a/utils/wikipedia_wordlist.py b/utils/wikipedia_wordlist.py new file mode 100644 index 0000000..61af0fb --- /dev/null +++ b/utils/wikipedia_wordlist.py @@ -0,0 +1,30 @@ +import requests +import json +import string + +def generate_wikipedia_wordlist(language_code, searchterm): + r = requests.get(f"https://{language_code}.wikipedia.org/w/api.php?action=query&format=json&titles={searchterm}&prop=extracts&explaintext") + extract = list(r.json()["query"]["pages"].values())[0]["extract"] + + relevant_words = {} + + for word in extract.split(): + word = word.strip() + word = word.lower() + + if len(word) <= 2: continue + + invalid_char = False + for letter in word: + if letter not in list(string.ascii_letters) + list(string.digits) + ["-", "ä", "ü", "ö"]: + invalid_char = True + break + if invalid_char: continue + + if word in relevant_words: + relevant_words[word] += 1 + else: + relevant_words[word] = 1 + + sorted_words = sorted(relevant_words.items(), key=lambda x: x[1], reverse=True) + return sorted_words \ No newline at end of file diff --git a/wordlists/country_codes.txt b/wordlists/country_codes.txt new file mode 100644 index 0000000..3fc7408 --- /dev/null +++ b/wordlists/country_codes.txt @@ -0,0 +1,18 @@ +# Country codes most used for domainsquatting +DE +US +FR +GB +NL +AU +CN +HK +TW +KR +CA +BE +ES +SE +CH +TR +AT \ No newline at end of file diff --git a/wordlists/phishing.txt b/wordlists/phishing.txt new file mode 100644 index 0000000..e66cd96 --- /dev/null +++ b/wordlists/phishing.txt @@ -0,0 +1,59 @@ +access +account +admin +auth +authentication +blue +business +calculation +cdn +claim +click +company +connect +copy +delivery +dhl +document +download +fedex +find +group +http +https +https-www +http-www +inside +invoice +label +local +login +mail +market +marketplace +mobile +my +online +portal +red +register +safe +secure +security +service +signin +signup +ssl +support +system +ticket +update +user +verification +verify +view +web +world +worldwide +ww +www \ No newline at end of file diff --git a/wordlists/similar_chars.txt b/wordlists/similar_chars.txt new file mode 100644 index 0000000..6422f89 --- /dev/null +++ b/wordlists/similar_chars.txt @@ -0,0 +1,107 @@ +# Homograph glyph list +# Original letter are first +# ! marks an important swap + +# ASCII similarities +! 1 l I + +# ASCII multi-letter similarities +! m rn +! A fi +! d cl +! w vv uu + +# L33tspeak +! o 0 +! e 3 +! a 4 +! b 8 + +# Phoentic similarities +! sch sh +! ss ß +! s z +! d t +! q c +! o u + +# German letters +! a ä +! o ö +! u ü + +# Cyrillic lower letters (second letter is cyrillic) +a а +c с +e е +o о +p р +x х +y у +3 З +4 Ч +6 б + +і i +ј j +ԛ q +ѕ s +ԝ w +ä ӓ +e ё +i ї +ö ӧ + + +# Cyrillic upper letters (second letter is cyrillic) +A А +B В +C С +E Е +H Н +I І +J Ј +K К +M М +O О +P Р +S Ѕ +T Т +X Х + +Y Y +F Ғ +G Ԍ + + +# Greek lower letters (second letter is greek) +o ο +v ν +a α +e ε +i ι +k κ +n η +p ρ +t τ +u υ +w ω +x χ +y γ + + +# Greek upper letters (second letter is greek) +A Α +B Β +E Ε +H Η +I Ι +K Κ +M Μ +N Ν +O Ο +P Ρ +T Τ +X Χ +Y Υ +Z Ζ \ No newline at end of file