forked from binary-person/womginx
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathnginx.conf
327 lines (293 loc) · 17.1 KB
/
nginx.conf
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
user nginx nginx;
worker_processes auto;
pid /run/nginx.pid;
include /etc/nginx/modules-enabled/*.conf;
events {
worker_connections 1024;
}
http {
include /etc/nginx/mime.types;
include /etc/nginx/conf.d/*; # add any additional/custom server blocks
map_hash_bucket_size 128;
resolver 1.1.1.1;
# resolver 1.1.1.3; # <-- uncomment this and comment out the above to block malware and adult sites
# ddos protection
client_body_timeout 10s;
client_header_timeout 10s;
proxy_connect_timeout 20s;
# these are very lenient limits. there should be no reason why a
# client will request 60 requests per second for a straight 8.33 seconds
limit_req_zone $binary_remote_addr zone=limitreq:20m rate=30r/s;
limit_req zone=limitreq burst=500 nodelay;
limit_req_status 429;
limit_conn_zone $binary_remote_addr zone=limitconn:20m;
limit_conn limitconn 50;
# domain blacklist. blacklisting www.example.com as an example
map $dest_host $blacklist {
default 0;
'www.example.com' 1;
}
# blacklist user agents
# the following is a default list that simply blocks all bots. credit to https://stackoverflow.com/a/24820722
map $http_user_agent $blacklist_useragent {
default 0;
~*(google|bing|yandex|msnbot) 1;
~*(AltaVista|Googlebot|Slurp|BlackWidow|Bot|ChinaClaw|Custo|DISCo|Download|Demon|eCatch|EirGrabber|EmailSiphon|EmailWolf|SuperHTTP|Surfbot|WebWhacker) 1;
~*(Express|WebPictures|ExtractorPro|EyeNetIE|FlashGet|GetRight|GetWeb!|Go!Zilla|Go-Ahead-Got-It|GrabNet|Grafula|HMView|Go!Zilla|Go-Ahead-Got-It) 1;
~*(rafula|HMView|HTTrack|Stripper|Sucker|Indy|InterGET|Ninja|JetCar|Spider|larbin|LeechFTP|Downloader|tool|Navroad|NearSite|NetAnts|tAkeOut|WWWOFFLE) 1;
~*(GrabNet|NetSpider|Vampire|NetZIP|Octopus|Offline|PageGrabber|Foto|pavuk|pcBrowser|RealDownload|ReGet|SiteSnagger|SmartDownload|SuperBot|WebSpider) 1;
~*(Teleport|VoidEYE|Collector|WebAuto|WebCopier|WebFetch|WebGo|WebLeacher|WebReaper|WebSauger|eXtractor|Quester|WebStripper|WebZIP|Wget|Widow|Zeus) 1;
~*(Twengabot|htmlparser|libwww|Python|perl|urllib|scan|Curl|email|PycURL|Pyth|PyQ|WebCollector|WebCopy|webcraw) 1;
}
# set scheme appropriately in case nginx is sitting behind servers like heroku
map $http_x_forwarded_proto $relativescheme {
default $http_x_forwarded_proto;
'' $scheme;
}
# websocket headers
map $http_upgrade $connection_upgrade {
default Upgrade;
'' close;
}
map '' $proxy_prefix {
default $relativescheme://$host;
}
map $targeturl $dest_host {
default '';
~^https?://([^/]+) $1;
}
# remove origin header from websocket url request: wss://dest:12/blah?somestuff?womginx_ws_origin_header=https://example.com
# convert wss://domain.com to https://domain.com
map $targeturl $dest_fullwebsocketurl {
default $targeturl;
~^ws(s)?://(.+)(\?womginx_ws_origin_header=.+)$ http$1://$2;
~^ws(s)?://(.+) http$1://$2;
}
# get targeturl destination from referrer
map $http_referer $dest_referrerhost {
default '';
~^https?://[^/]+/main(/[^_/]+_)?/(?<targeturl>https?://?[^/]+) $targeturl;
}
# use $request_uri for proxy cookie rewriting where there is no $targeturl variable
map $request_uri $dest_hostwithscheme {
default '';
~^/main(/[^_/]+_)?/(https?://?[^/]+) $2;
}
# header rewrites
map $http_referer $rewrite_referer {
default $http_referer;
~^https?://[^/]+/main(/[^_/]+_)?/(?<targeturl>.*) $targeturl;
}
# rewrite origin header with destination host. if that doesn't exist, don't rewrite it
# also, get origin header from websocket request, prioritizing over $http_origin if that exists (see $dest_fullwebsocketurl)
map $http_origin:womginxseparate:$request_uri $rewrite_origin {
default $dest_hostwithscheme;
# origin maybe exists, womginx_ws_origin_header exists
~^.+(\?womginx_ws_origin_header=(.+))$ $2;
# origin and womginx_ws_origin_header doesn't exist
~^:womginxseparate: '';
}
# womginx_ws_origin_header contain merged slashes from wombat-handler.js to avoid the middle server closing
# the websocket connection because of the need to merge it. this is to undo that
map $rewrite_origin $reslashed_origin {
default $rewrite_origin;
~^(https?:/)([^/].*) $1/$2;
}
server {
server_name womginx.arph.org;
listen 80;
listen 443 ssl;
ssl_certificate /etc/letsencrypt/live/womginx.arph.org/fullchain.pem;
ssl_certificate_key /etc/letsencrypt/live/womginx.arph.org/privkey.pem;
merge_slashes off;
proxy_ssl_server_name on;
error_page 404 = @referrer-handler;
# reason for putting rewrites here is putting them in parent of nested locations will
# not apply to child locations
# use undecoded uri as location for proxying encoded urls
rewrite ^ $request_uri;
# convert /main/http:/google.com to /main/http://google.com internally without redirect
# because some services like repl.it force-merge double slashes
# (this is handled together with wombat-handler.js)
rewrite ^\/main(?<mod>\/[^\/_]+_)?(?<url_preslash>\/(?:http|ws)s?:\/)(?<url_postslash>[^\/].*) /main$mod$url_preslash/$url_postslash;
# convert /main/http://domain.com to /main/http://domain.com/
# convert /main/http://domain.com?a to /main/http://domain.com/?a
rewrite ^(/main/https?://[^/?]*)/?(.*) $1/$2;
set $womginx_cookie 'womginx_are_you_a_bot=no; Path=/; HttpOnly';
# Seemlessly add womginx cookie to improve user experience. Cookies sometimes get unset by
# the proxy site server so it is confusing when the 403 error pops up. Also, going to / to set cookie
# wouldn't necessarily work since the browser caches the page but doesn't actually set the required cookie.
# example url: /womginxaddcookie/timestamp_for_cache_busting/https://womginxserver/main/https://www.google.com
location ~^/womginxaddcookie/\d+/(.+) {
add_header Set-Cookie $womginx_cookie;
return 302 $1;
}
#### static file requests handling ####
# serve static files, disallow access to .git directories, and ignore prefixes starting with /main/
location ~^((?!/\.git|^/main/).)*$ {
add_header 'Set-Cookie' $womginx_cookie;
alias /home/binary/womginx/public$uri;
# if no file is found, pass it to @referrer-handler
}
location = / {
if ($dest_referrerhost = '') {
rewrite ^ /index.html last;
}
# if no file exists, pass it to @referrer-handler
return 404;
}
# catch links like '/assets.png' and redirect them by their referrer if referrer is valid
location @referrer-handler {
# if referrer doesn't exist, send 404 page
if ($dest_referrerhost = '') {
return 404;
}
# if it exists, send appropriate redirection
return 302 $proxy_prefix/main/$dest_referrerhost$request_uri;
}
#### proxy requests handling ####
location /main {
# reason for including all proxy header modifications here is because apparently,
# using proxy_set_header in a child block automatically negates all parent proxy_set_header
# directives.
# disable disabling inline scripts (if proxy sends a content-security-policy like that over)
proxy_hide_header Content-Security-Policy;
# force proxy to send non-compressed for sub_filter to work
proxy_set_header Accept-Encoding 'identity;q=1, *;q=0';
# prevent proxied servers' HSTS headers from affecting the client
proxy_hide_header Strict-Transport-Security;
# enable support for embedding proxy site in an iframe
proxy_hide_header X-Frame-Options;
# rewrite referer header
proxy_set_header Referer $rewrite_referer;
# rewrite origin header
proxy_set_header Origin $reslashed_origin;
# rewrite cookie domain and path
proxy_cookie_domain ~(.*) $host;
# rewrite cookie path
proxy_cookie_path ~(/.*) /main/$dest_hostwithscheme$1;
# websocket headers
proxy_set_header Upgrade $http_upgrade;
proxy_set_header Connection $connection_upgrade;
# fix "upstream sent too big header/body"
proxy_buffer_size 16k;
# proxy_buffer_size + 8k
proxy_busy_buffers_size 24k;
# numOfBuffers * bufferSize >= proxy_buffer_size
proxy_buffers 4 16k;
# client can only upload files less than 100M
client_max_body_size 100M;
# rewrite relative urls like /main/// to /main/https:// internally without redirect
location ~^/main(/[^_/]+_)?///(?<protocol_relative_url>.+) {
rewrite ^/main(/[^_/]+_)?///(?<protocol_relative_url>.+) /main$1/$relativescheme://$protocol_relative_url last;
}
# redirect any url with /main/google.com to /main/https://google.com
location ~^/main(/[^/_]+_)?(?!(/[^/_]+_)?/(http|ws)s?://|//)/(?<rewrite_url_with_scheme>.*) {
return 302 $proxy_prefix/main$1/$relativescheme://$rewrite_url_with_scheme;
}
# handle websocket connections
location ~^/main/\d*ws_/(?<targeturl>.+){
proxy_http_version 1.1;
proxy_pass $dest_fullwebsocketurl;
}
# serve assets
location ~^/main/\d*(im|oe)_/(?<targeturl>.+) {
proxy_pass $targeturl;
}
# serve web and service workers. rewrites are done client side
location ~^/main/\d*(wkr|sw)_/(?<targeturl>.+) {
proxy_pass $targeturl;
}
# rewrite js files
location ~^/main/\d*js_/(?<targeturl>.+) {
sub_filter_once off;
sub_filter_types *;
sub_filter '.postMessage(' '.__WB_pmw(self.window).postMessage(';
sub_filter '.postMessage (' '.__WB_pmw(self.window).postMessage (';
sub_filter 'window.location' 'window.currentLocation';
proxy_pass $targeturl;
}
# redirect any url with /main/unsupportedmod_/blahblah to /main/blahblah
location ~^/main/[^_/]+_/(?<redirecttargeturl>.+) {
return 302 $proxy_prefix/main/$redirecttargeturl;
}
location ~^/main/(?<targeturl>.*)$ {
# blacklist urls
if ($blacklist) {
return 403 'Sorry. The owner of this proxy decided to disallow access to this content.';
}
# blacklist user agents
if ($blacklist_useragent) {
return 403 'Sorry, you are not allowed to access this content.';
}
# deny access to anyone without the special cookie, but redirect them appropriately to set it
if ($cookie_womginx_are_you_a_bot != 'no') {
add_header Content-Type text/html always;
return 403 '<script>location.href="/womginxaddcookie/"+Date.now()+"/"+location.href</script>';
}
# handle proxy sending redirection requests
proxy_redirect ~^(https?://.+) $proxy_prefix/main/$1;
sub_filter_once off;
# sub_filter_types text/html; already text/html by default but leaving it here for clarity
# and commenting it out to suppress duplicate warnings
# wrap href,src,poster,action,srcset,data,codebase links.
# and add a womginx-processed attribute to the tag to notify client js it is already handled.
# since we're not using subs_filter module for simplicity of installation, we are going to
# list every possible combination. However, we only need to do src and script src tags
# because they cannot be handled by wombat, and for the rest of the attributes, we can
# let the client handle it for offloading the CPU load to the client and maintaining
# the integrity of the html document (so we reduce chances of replacing an attribute at a
# wrong place)
set $processed_flag_attribute 'womginx-processed';
# 1. change 'href="//domain/image.png' to 'womginx-processed href="/main/$relativescheme://domain/image.png'
# for scripts, serve under /main/js_/http://domain.com/script.js to inject wombat code
sub_filter '<script src="//' '<script $processed_flag_attribute src="/main/js_/$relativescheme://';
sub_filter '<script type="text/javascript" src="//' '<script type="text/javascript" $processed_flag_attribute src="/main/js_/$relativescheme://';
sub_filter 'src="//' '$processed_flag_attribute src="/main/$relativescheme://';
# 2. change 'href="/image.png' to 'womginx-processed href="/main/$dest_hostwithscheme/image.png'
sub_filter '<script src="/' '<script $processed_flag_attribute src="/main/js_/$dest_hostwithscheme/';
sub_filter '<script type="text/javascript" src="/' '<script type="text/javascript" $processed_flag_attribute src="/main/js_/$dest_hostwithscheme/';
sub_filter 'src="/' '$processed_flag_attribute src="/main/$dest_hostwithscheme/';
# 3. change 'href="https://domain.com/image.png"' to 'womginx-processed href="/main/https://domain.com/image.png"'
sub_filter '<script src="https://' '<script $processed_flag_attribute src="/main/js_/https://';
sub_filter '<script type="text/javascript" src="https://' '<script type="text/javascript" $processed_flag_attribute src="/main/js_/https://';
sub_filter 'src="https://' '$processed_flag_attribute src="/main/https://';
# 4. same as above but with http:// instead of https://
sub_filter '<script src="http://' '<script $processed_flag_attribute src="/main/js_/http://';
sub_filter '<script type="text/javascript" src="http://' '<script type="text/javascript" $processed_flag_attribute src="/main/js_/http://';
sub_filter 'src="http://' '$processed_flag_attribute src="/main/http://';
# 5. rewrite 'integrity="' to 'nointegrity=' to disable script integrity checking
sub_filter 'integrity="' 'nointegrity="';
# 6. disable meta tag's Content-Security-Policy
sub_filter 'http-equiv="Content-Security-Policy"' 'http-equiv="No-U-Content-Security-Policy"';
# do the same thing but instead of ", do '
# 1.
sub_filter "<script src='//" "<script $processed_flag_attribute src='/main/js_/$relativescheme://";
sub_filter "<script type='text/javascript' src='//" "<script type='text/javascript' $processed_flag_attribute src='/main/js_/$relativescheme://";
sub_filter "src='//" "$processed_flag_attribute src='/main/$relativescheme://";
# 2.
sub_filter "<script src='/" "<script $processed_flag_attribute src='/main/js_/$dest_hostwithscheme/";
sub_filter "<script type='text/javascript' src='/" "<script type='text/javascript' $processed_flag_attribute src='/main/js_/$dest_hostwithscheme/";
sub_filter "src='/" "$processed_flag_attribute src='/main/$dest_hostwithscheme/";
# 3.
sub_filter "<script src='https://" "<script $processed_flag_attribute src='/main/js_/https://";
sub_filter "<script type='text/javascript' src='https://" "<script type='text/javascript' $processed_flag_attribute src='/main/js_/https://";
sub_filter "src='https://" "$processed_flag_attribute src='/main/https://";
# 4.
sub_filter "<script src='http://" "<script $processed_flag_attribute src='/main/js_/http://";
sub_filter "<script type='text/javascript' src='http://" "<script type='text/javascript' $processed_flag_attribute src='/main/js_/http://";
sub_filter "src='http://" "$processed_flag_attribute src='/main/http://";
# 5.
sub_filter "integrity='" "nointegrity='";
# 6. disable meta tag's Content-Security-Policy
sub_filter "http-equiv='Content-Security-Policy'" "http-equiv='No-U-Content-Security-Policy'";
# insert wombat.js and wombat-handler.js scripts
sub_filter '<head>' '<head>
<script $processed_flag_attribute src="/wombat/dist/wombat.js"></script>
<script $processed_flag_attribute src="/wombat-handler.js" processed-attribute="$processed_flag_attribute">
</script>';
proxy_pass $targeturl;
}
}
}
}