-
Notifications
You must be signed in to change notification settings - Fork 1
/
url-check-config.json
148 lines (148 loc) · 8.31 KB
/
url-check-config.json
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
{
"repositories" : {
"standard.publiccode.net": {
"url": "https://github.com/publiccodenet/standard.git",
"branch": "main"
},
"standard.publiccode.net-develop": {
"url": "https://github.com/publiccodenet/standard.git",
"branch": "develop"
},
"publiccode.net": {
"url": "https://github.com/publiccodenet/publiccode.net.git",
"branch": "main"
},
"about.publiccode.net": {
"url": "https://github.com/publiccodenet/about.git",
"branch": "main"
},
"blog.publiccode.net": {
"url": "https://github.com/publiccodenet/blog.git",
"branch": "main"
},
"bumperscripter": {
"url": "https://github.com/publiccodenet/bumperscripter.git",
"branch": "main"
}
},
"ignore_patterns" : {
"\\[subdomain\\]\\.publiccode\\.net": "template",
"FILE_BASE}.html": "template",
"http[s]\\?://archive\\.org/web/": "often times out",
"http[s]\\?://web\\.archive\\.org/web/": "often times out",
"http[s]\\?://twitter\\.com": "302; does not serve scripts",
"http[s]\\?://linkedin\\.com": "302; does not serve scripts",
"http[s]\\?://www\\.linkedin\\.com": "999; does not serve scripts",
"http[s]\\?://chat\\.openai\\.com": "302; does not serve scripts",
"https://github.com/org_name/codebase_name.git": "bogus example URL",
"http[s]\\?://github\\.com/.*/edit/": "may point to yet-to-exist page",
"http[s]\\?://docs\\.github\\.com/": "seems blocked as DoS protection",
"http[s]\\?://github\\.com/[-0-9A-Za-z_\\./]\\+/\\(issues\\|pull\\)/[0-9]\\+": "ignore github issues and PRs",
"plausible\\.io/js/plausible\\.js": "does not serve to scripts",
"https://github.com/publiccodenet/standard/compare/main...release": "example URL in docs/releasing",
"opensource\\.org": "failed: 503 No error",
"belastingdienst\\.nl/": "regular timeouts",
"reclameland\\.nl/drukken": "failed: 403 No error",
"https://help.miro.com": "403 to script",
"www\\.dta\\.gov\\.au/help-and-advice": "failed: 403 No error",
"https://pixabay\\.com/": "gives 403 to curl",
"https://fonts.google.com/download?family=": "bash param in the URL",
"https://standard.publiccode.net/criteria/\\\\2.html": "regex in URL",
"https://www.go-fair.org/": "gives 400s when run as GitHub workflow",
"https://support\\.google\\.com/": "gives 404 to curl",
"https://www\\.komoot\\.com/": "gives 404 to curl, works in browser",
"https://www\\.grammarly\\.com/": "HTTP/2 405, allow: POST, GET",
"https://giphy\\.com": "gives 503 to curl",
"https://www\\.lonebeard\\.com": "defunct, referenced in binary files",
"http[s]\\?://cipa\\.jp/exif": "defunct, embedded in some .jpg files",
"http://ns\\.adobe\\.com/": "defunct, embedded in .jpg",
"http://www\\.gimp\\.org/xmp/": "defunct, embedded in .jpg",
"http://www\\.inkscape\\.org/namespaces/inkscape": "defunct, in .svg",
"http[s]\\?://sodipodi\\.sourceforge\\.net/DTD/sodipodi-0\\.dtd": "defunct, in SVGs",
"http[s]\\?://www\\.omg\\.org/spec/.*/20100524": "defunct, embedded in old .bpmn files",
"http[s]\\?://bpmn.io/schema/bpmn": "unreliable",
"http[s]\\?://www\\.un\\.org/en/content/": "frequent timeout",
"http[s]\\?://arkitektur\\.digst\\.dk/node/1173": "times out",
"http[s]\\?://eur-lex\\.europa\\.eu/legal-content/EN/TXT": "timeouts",
"https://www\\.uwv\\.nl": "gives 404 to curl",
"listennotes\\.com/": "frequent timeouts",
"lists\\.publiccode\\.net/mailman/": "frequent timeouts",
"https://wetten\\.overheid\\.nl": "times out",
"amsterdam\\.nl/en/": "frequent timeouts",
"iso\\.org/drafting-standards\\.html": "timeouts",
"https://flickr.com/e/tFzM3d9XsB": "defunct, embedded in a .jpg",
"http://www\\.instagram\\.com/lottedale": "429, embedded in .jpg",
"https://mijnwerkgeversportaal\\.acumen\\.nl/": "timeouts",
"https://groups\\.google\\.com/./publiccode\\.net/g": "requires login",
"https://support\\.streamyard\\.com/": "gives 403 to curl",
"http[s]\\?://www\\.figma\\.com": "gives 404 to curl"
},
"transforms" : {
"sed 's@)\\](http@\\nhttp@g'":
"split double-urls",
"sed 's@\\](http@\\nhttp@g'":
"split double-urls",
"sed 's@/[\\.,):\\!\\?\\*\u2019]*$@/@g'":
"remove trailing punctuation from links ending in '/'",
"sed 's@\\.net[\\.,):\\!]*[email protected]@g'":
"remove trailing punctuation from links ending in '.net'",
"sed 's@\\.com[\\.,):\\!]*[email protected]@g'":
"remove trailing punctuation from links ending in '.com'",
"sed 's@\\.org[\\.,):\\!]*[email protected]@g'":
"remove trailing punctuation from links ending in '.org'",
"sed 's@\\.html[\\.,):\\!]*[email protected]@g'":
"remove trailing punctuation from links ending in '.html'",
"sed 's@\\.json[\\.,):\\!]*[email protected]@g'":
"remove trailing punctuation from links ending in '.json'",
"sed 's@^\\(http.*\\.pdf\\)[\\.,):\\!]*$@\\1@g'":
"remove trailing punctuation from links ending in '.pdf'",
"sed 's@Open_air_school).$@Open_air_school@g'":
"remove trailing punctuation'",
"sed 's@\\(nextcloud/index.php/s/[-0-9a-zA-Z]*\\)[\\.,):\\!]*$@\\1@g'":
"remove trailing punctuation from nextcloud files",
"sed 's@poortwachter[\\.,):\\!]*$@poortwachter@g'":
"remove trailing punctuation",
"sed 's@\\(://tools\\.ietf\\.org/html/rfc[0-9]*\\)[\\.,):\\!]*$@\\1@g'":
"remove trailing punctuation",
"sed -r 's@(http://hintjens\\.com/blog:[0-9]+)[^0-9]+.*@\\1@g'":
"remove anchors, trailing punctuation, parameters",
"sed -r 's@(http[s]\\?://hackmd\\.io/[^#]+)#.*@\\1@g'":
"remove anchors, trailing punctuation, parameters",
"sed 's@)/\\[.*@@g'":
"remove trailing punctuation, and following text",
"sed 's@\\(https://youtu\\.be/[-A-Za-z0-9_]*\\).*@\\1@g'":
"remove anchors, trailing punctuation, parameters",
"sed 's@\\(https://www\\.youtube\\.com/watch?v=[-A-Za-z0-9_]*\\).*@\\1@g'":
"remove anchors, trailing punctuation, parameters",
"sed 's@\\(https://youtube\\.com/watch?v=[-A-Za-z0-9_]*\\).*@\\1@g'":
"remove anchors, trailing punctuation, parameters",
"sed 's@publiccode\\.net/organization/staff).*@publiccode.net/organization/staff@g'":
"remove anchors, trailing punctuation, parameters",
"sed 's@publiccode\\.net/logo/mark\\.svg.*@publiccode.net/logo/mark.svg@'":
"remove anchors, trailing punctuation, parameters",
"sed 's@Open_air_school)\\?@Open_air_school@g'":
"remove anchors, trailing punctuation, parameters",
"sed 's@Frontend)!$@Frontend@g'":
"remove anchors, trailing punctuation, parameters",
"sed 's@alliance)!$@alliance@g'":
"remove anchors, trailing punctuation, parameters",
"sed 's@export_processing)\\.$@export_processing@g'":
"remove anchors, trailing punctuation, parameters",
"sed 's@say)\\*\\*$@say@g'":
"remove anchors, trailing punctuation, parameters",
"sed 's@\\(bmj\\.com/[-a-zA-Z0-9/\\.]*\\)[)\\?]*@\\1@g'":
"remove trailing punctuation",
"sed 's@edit)\\.\\*\\*$@edit@g'":
"remove trailing punctuation",
"sed 's@\\(oeffentliche-it\\.de/[-a-zA-Z0-9/%\\.\\+]*\\)[)\\]]*$@\\1@g'":
"remove trailing punctuation",
"sed 's@\\(github\\.com/[-a-zA-Z0-9/%\\.\\+]*\\)[)\\]]*$@\\1@g'":
"remove trailing punctuation",
"sed 's@\\(reclameland\\.nl/[-a-zA-Z0-9/%\\.\\+]*\\)[\\.)\\],:!\\?]*$@\\1@g'":
"remove trailing punctuation",
"sed 's@\\(/publiccodenet/[-_a-zA-Z0-9/\\.\\+]*\\)[]\\._),:!\\?]*$@\\1@g'":
"remove trailing punctuation",
"sed 's@\\(publiccode\\.net/[-_a-zA-Z0-9/\\.\\+]*\\)[]\\._),:!\\?]*$@\\1@g'":
"remove trailing punctuation"
}
}