-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcombo.py
157 lines (141 loc) · 5.29 KB
/
combo.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
import requests
import os
import urllib.parse
import re
import datetime
import publicsuffixlist
psl = publicsuffixlist.PublicSuffixList()
LIST_FILENAME = "list.txt"
STATUS_FILENAME = "status.txt"
DOMAIN_FILENAME = "domains.txt"
lists = {
"Dandelion Sprout's Anti-Malware List":"https://raw.githubusercontent.com/DandelionSprout/adfilt/master/Dandelion%20Sprout's%20Anti-Malware%20List.txt",
"The malicious website blocklist":"https://raw.githubusercontent.com/iam-py-test/my_filters_001/main/Alternative%20list%20formats/antimalware_lite.txt",
"iam-py-test's antitypo list":"https://raw.githubusercontent.com/iam-py-test/my_filters_001/main/antitypo.txt",
"Actually Legitimate URL Shortener Tool":"https://raw.githubusercontent.com/DandelionSprout/adfilt/master/LegitimateURLShortener.txt"
}
donelines = []
donedomains = []
excludes = requests.get("https://raw.githubusercontent.com/iam-py-test/allowlist/main/filter.txt").text.split("\n")
subdomains = requests.get("https://raw.githubusercontent.com/iam-py-test/tracker_analytics/main/kdl.txt").text.split("\n")
subdomains += requests.get("https://raw.githubusercontent.com/iam-py-test/my_filters_001/main/Alternative%20list%20formats/antimalware_domains.txt").text.split("\n")
subdomains += requests.get("https://raw.githubusercontent.com/iam-py-test/cloudflare-usage/main/cnames.txt").text.split("\n")
dead = requests.get("https://raw.githubusercontent.com/iam-py-test/my_filters_001/refs/heads/main/dead.mwbcheck.txt").text.split("\n")
# https://www.geeksforgeeks.org/how-to-validate-an-ip-address-using-regex/
is_ip_v4 = "^((25[0-5]|2[0-4][0-9]|1[0-9][0-9]|[1-9]?[0-9])\.){3}(25[0-5]|2[0-4][0-9]|1[0-9][0-9]|[1-9]?[0-9])$"
is_ip_v6 = "((([0-9a-fA-F]){1,4})\\:){7}"\
"([0-9a-fA-F]){1,4}"
is_ip_v4_reg = re.compile(is_ip_v4)
is_ip_v6_reg = re.compile(is_ip_v6)
def isipdomain(domain):
if re.search(is_ip_v4_reg,domain):
return True
if re.search(is_ip_v6_reg,domain):
return True
return False
def extdomain(line):
try:
domain = ""
if line.startswith("||") and line.endswith("^$all"):
domain = line[2:-5]
if line.startswith("||") and line.endswith("^$doc"):
domain = line[2:-5]
if line.startswith("||") and line.endswith("^$document"):
domain = line[2:-10]
if line.startswith("||") and line.endswith("^$3p"):
domain = line[2:-5]
elif line.startswith("||") and line.endswith("^$all,~inline-font,~inline-script"):
domain = line[2:-33]
elif line.startswith("||") and line.endswith("^"):
domain = line[2:-1]
elif line.startswith("||") and line.endswith("^$all,~inline-font"):
domain = line[2:-18]
elif line.startswith("||") and line.endswith("^$doc,popup"):
domain = line[2:-11]
elif line.startswith("||") and line.endswith("^$all,~inline-script"):
domain = line[2:-20]
return domain
except:
return ""
mainlist = """! Title: iam-py-test's Combo List
! Expires: 1 day
! Script last updated: 22/11/2023
! Last updated: {}
! Homepage: https://github.com/iam-py-test/uBlock-combo
! the Python script and my two lists are under CC0
! for Dandelion Sprout's Anti-Malware List and The Actually Legitimate URL Shortener Tool, see https://github.com/DandelionSprout/adfilt/blob/master/LICENSE.md
""".format(datetime.date.today().strftime("%d/%m/%Y"))
eadd = 0
ered = 0
parselist = None
def parselist(l,curl=""):
global donedomains
global donelines
global eadd
global ered
plist = ""
for line in l:
edomain = extdomain(line)
if (line.startswith("!") or line.startswith("#")) and "include" not in line:
continue
elif line.startswith("[Adblock") and line.endswith("]"):
continue
elif line in donelines:
ered += 1
elif line in excludes:
continue
elif line == "":
continue
elif edomain != "" and edomain in donedomains:
continue
elif edomain in dead and edomain != "" and edomain != None:
continue
elif line.startswith("!#include "):
try:
incpath = urllib.parse.urljoin(curl,line[10:], allow_fragments=True)
inccontents = requests.get(incpath).text.replace("\r", "").split("\n")
endcontents = parselist(inccontents, incpath)
plist += "{}\n".format(endcontents)
except Exception as err:
print(line,err)
else:
plist += "{}\n".format(line)
eadd += 1
donelines.append(line)
if edomain != "" and edomain != " ":
donedomains.append(edomain)
return plist
for clist in lists:
l = requests.get(lists[clist]).text.split("\n")
mainlist += parselist(l,lists[clist])
with open(LIST_FILENAME,"w",encoding="UTF-8") as f:
f.write(mainlist)
f.close()
justdomains = []
for d in donedomains:
if "/" not in d and "." in d and "*" not in d and d != "" and d.endswith(".") == False and isipdomain(d) == False:
justdomains.append(d)
with open(DOMAIN_FILENAME, "w", encoding="UTF-8") as f:
f.write("\n".join(justdomains))
f.close()
subsfound = 0
domainplussub = justdomains
for sub in subdomains:
try:
maindomain = psl.privatesuffix(sub)
if maindomain in domainplussub and sub not in domainplussub:
subsfound += 1
domainplussub.append(sub)
except Exception as err:
print(err, sub)
with open("domains_subdomains.txt", "w", encoding="UTF-8") as f:
f.write("\n".join(justdomains))
f.close()
with open(STATUS_FILENAME,'w') as status:
status.write("""Stats:
{} entries added
{} redundant entries removed
{} domains added
{} subdomains added
""".format(eadd,ered,len(donedomains), subsfound))
status.close()