-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpernosco
executable file
·430 lines (366 loc) · 18.2 KB
/
pernosco
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
#!/usr/bin/env python3
import argparse
import base64
import enum
import glob
import http.server
import json
import logging
import os
import random
import re
import shutil
import subprocess
import sys
import threading
import time
from typing import Dict, List, NewType, Optional, Tuple
import watchdog
from watchdog import events, observers
import pernoscoshared.base as base
ContainerId = NewType("ContainerId", str)
TraceId = NewType("TraceId", str)
ImageName = NewType("ImageName", str)
# Setup Pernosco style logging output.
Pid = NewType('Pid', int)
Tid = NewType('Tid', int)
pid_tid_storage = threading.local()
def pid_tid() -> Tuple[Pid, Tid]:
if not getattr(pid_tid_storage, 'initialized', None):
pid = os.getpid()
tid = threading.get_native_id()
pid_tid_storage.value = (pid, tid)
pid_tid_storage.initialized = True
return pid_tid_storage.value
def pid_tid_filter(record):
(pid, tid) = pid_tid()
record.pid = pid
record.tid = tid
return record
pid_tid_handler = logging.StreamHandler()
pid_tid_handler.addFilter(pid_tid_filter)
logging.basicConfig(level=logging.INFO,
handlers=[pid_tid_handler],
format='%(levelname)s:%(name)s:%(pid)d.%(tid)d:%(created)10.7f: %(message)s')
process_setup_logger = logging.getLogger("process_setup")
process_setup_logger.info("Process started: %s"%" ".join(sys.argv))
random.seed()
# Now start actually doing stuff.
arg_parser = argparse.ArgumentParser(add_help=False)
arg_subparsers = arg_parser.add_subparsers(dest='subcommand', title='Subcommands')
global_opts_group = arg_parser.add_argument_group("Global Options")
global_opts_group.add_argument("-h", "--help", action='help', help="Show this help message and exit")
global_opts_group.add_argument("-x", dest='echo_commands', action='store_true', help="Echo spawned command lines")
global_opts_group.add_argument("--log", help="(debug|info|warn|error) sets logging to the given level.")
global_opts_group.add_argument("--container-runtime", help="Set docker or podman as the runtime (use podman at your own risk!)", default="docker")
global_opts_group.add_argument("--gcloud", action='store_true', help="Use containers from Google Cloud Artifact Registry instead of AWS")
run_subparser = arg_subparsers.add_parser("run", help="Run the lobby-server.")
run_subparser.add_argument("-p", "--port", type=int, default=8080, help="The port to serve Pernosco on. (default to 8080 if unspecified)")
run_subparser.add_argument("--storage", help="Use this directory to store persistent data (i.e. notebook data)")
run_subparser.add_argument("--url-root", help="The outwardly-visible URL for this server (e.g. https://pernos.co)")
run_subparser.add_argument("--notify-gcp-pubsub-topic", help="The GCP Pub/Sub to send notifications to")
run_subparser.add_argument("--notify-on-startup", action='store_true', help="Send a notification on startup")
run_subparser.add_argument("dir", help="The directory in which rr traces can be found")
args = arg_parser.parse_args()
base.echo_commands = args.echo_commands
if args.log:
logging.getLogger(None).setLevel(args.log.upper())
if args.gcloud:
REPOSITORY_REGION = 'us-west1'
REPOSITORY_HOST = '%s-docker.pkg.dev'%REPOSITORY_REGION
STATIC_HOST = 'http://pernosco-statics.s3-website-us-west-1.amazonaws.com'
SERVER_REPOSITORY = '%s/pernosco/customers/app-server'%REPOSITORY_HOST
REVERSE_PROXY_REPOSITORY = '%s/pernosco/customers/reverse-proxy'%REPOSITORY_HOST
else:
REPOSITORY_REGION = 'us-east-2'
REPOSITORY_HOST = '643334553517.dkr.ecr.%s.amazonaws.com'%REPOSITORY_REGION
STATIC_HOST = 'http://pernosco-statics.s3-website-us-west-1.amazonaws.com'
SERVER_REPOSITORY = '%s/customers/app-server'%REPOSITORY_HOST
REVERSE_PROXY_REPOSITORY = '%s/customers/reverse-proxy'%REPOSITORY_HOST
metadata = {
'revision': '783fc15c075c8223620519c68fb47686a43e0c73',
}
SERVER_IMAGE = ImageName("%s:%s"%(SERVER_REPOSITORY, metadata['revision']))
REVERSE_PROXY_IMAGE = ImageName("%s:%s"%(REVERSE_PROXY_REPOSITORY, metadata['revision']))
STATIC_PREFIX = "%s/client/%s"%(STATIC_HOST, metadata['revision'])
def start_container(image: ImageName, name: str, params: List[str], detach: bool = True,
needs_all_syscalls: bool = False, network_mode_host: bool = False,
env: Dict[str, str] = {}, read_write_mounts: List[Tuple[str, str]] = [],
read_only_mounts: List[Tuple[str, str]] = [],
labels: List[Tuple[str, str]] = []) -> ContainerId:
cmd = [args.container_runtime, 'run', '--name',
"%s-%s"%(name, hex(random.randrange(pow(2,64)))[2:]), '--tmpfs', '/tmp',
'--env', 'RUST_BACKTRACE=full', '--init']
if detach:
cmd.extend(['--detach'])
if needs_all_syscalls:
# app server containers need exotic kernel features like ptrace
# and user namespaces, so these confinements need to be disabled.
cmd.extend(['--security-opt', 'seccomp=unconfined',
'--security-opt', 'apparmor=unconfined'])
if network_mode_host:
cmd.extend(['--network', 'host'])
for e in env:
cmd.extend(['--env', "%s=%s"%(e, env[e])])
for mount in read_write_mounts:
cmd.extend(['--mount', 'type=bind,bind-propagation=rslave,src=%s,dst=%s'%mount])
for mount in read_only_mounts:
cmd.extend(['--mount', 'type=bind,bind-propagation=rslave,readonly,src=%s,dst=%s'%mount])
for label in labels:
cmd.extend(['--label', '%s=%s'%label])
cmd.append(image)
cmd.extend(params)
# Avoid using 'encoding' parameter to check_output because it was only
# added in Python 3.6.
return ContainerId(base.check_output(cmd).decode().strip())
def stop_container(container_id: ContainerId) -> None:
base.call([args.container_runtime, 'stop', container_id], stdout=subprocess.DEVNULL)
def cleanup_container(container_id: ContainerId) -> None:
base.check_call([args.container_runtime, 'rm', '--force', '--volumes', container_id], stdout=subprocess.DEVNULL)
url_root: str = "http://127.0.0.1:%d"%args.port
if not args.url_root is None:
# Remove any trailing slash.
url_root = args.url_root.removesuffix('/')
def start_appserver(uuid: TraceId, path: str, db_candidates: List[str],
extra_arguments: List[str] = [],
read_write_mounts: List[Tuple[str, str]] = []) -> ContainerId:
params = ['--empty-umask']
params.extend(extra_arguments)
if not url_root is None:
params.extend(['--public-url', "%s/debug/%s/index.html"%(url_root,uuid)])
read_only_mounts = [(path, "/pernosco/database")]
env = {'PERNOSCO_ENABLE_VARIABLES_ANNOTATIONS': '0',
'RUST_LOG': 'info'}
return start_container(SERVER_IMAGE, 'pernosco-%s'%uuid, params,
needs_all_syscalls=True, env=env,
read_only_mounts=read_only_mounts,
read_write_mounts=read_write_mounts,
labels=[('traefik.http.services.app-server-%s.loadbalancer.server.port'%uuid, '3000'),
('traefik.http.routers.app-server-%s.service'%uuid, 'app-server-%s'%uuid),
('traefik.http.routers.app-server-%s.priority'%uuid, '1000'),
('traefik.http.routers.app-server-%s.rule'%uuid, 'PathPrefix(`/debug/%s/app`)'%uuid),
('traefik.http.routers.app-server-%s.middlewares'%uuid, 'strip-app-server-prefix-%s'%uuid),
('traefik.http.middlewares.strip-app-server-prefix-%s.stripprefix.prefixes'%uuid, '/debug/%s/app'%uuid)])
def process_one_dir(path: str, newly_added: bool = False,
extra_arguments: List[str] = [],
read_write_mounts: List[Tuple[str, str]] = []) -> Optional[Tuple[TraceId, ContainerId]]:
dir_provider_logger = logging.getLogger("dir_provider")
# See if we can find a candidate database directory.
db_candidates = glob.glob("db.*", root_dir=path)
if not db_candidates:
dir_provider_logger.debug("Ignoring directory %s with no db candidates"%path)
return None
# Since it looks like a Pernosco trace directory, go ahead and run
# rr traceinfo on it.
output = base.check_output(['rr', 'traceinfo', path]).decode('utf-8')
trace_info = json.loads(output)
# Convert the trace UUID to urlsafe base64, but strip padding.
uuid = TraceId(base64.urlsafe_b64encode(bytes(trace_info['uuid']))[:-2].decode('utf-8'))
if newly_added:
print("Would notify gcp")
if not args.notify_gcp_pubsub_topic is None:
extra_arguments.extend(['--notify-gcp-pubsub-topic', args.notify_gcp_pubsub_topic])
if args.notify_on_startup:
extra_arguments.extend(['--notify-gcp-pubsub-on-startup'])
container = start_appserver(uuid, path, db_candidates,
extra_arguments=extra_arguments,
read_write_mounts=read_write_mounts)
print("%s available at %s/debug/%s/index.html"%(path, url_root, uuid))
return (uuid, container)
@enum.unique
class KnownHtml(enum.Enum):
INDEX = 1
SOURCE_VIEWER = 2
def __str__(self):
if self == KnownHtml.INDEX:
return "index"
if self == KnownHtml.SOURCE_VIEWER:
return "source-viewer"
raise ValueError("huh?")
known_html_cache: Dict[TraceId, Dict[KnownHtml, bytes]] = {}
auth_server: Optional[http.server.HTTPServer] = None
def get_known_html(known_html: KnownHtml, trace_id: TraceId,
containers: Dict[TraceId, ContainerId]) -> Optional[bytes]:
cache = known_html_cache.get(trace_id, {})
cached = cache.get(known_html)
if cached is None:
container_id = containers[trace_id]
if container_id == None:
return None
cached = base.check_output([args.container_runtime, 'exec', container_id, '/bin/bash', '-c', 'cat $PERNOSCO_ROOT/main/client/%s.html'%known_html], stdin=subprocess.DEVNULL)
# XXXkhuey these should not be hardcoded.
cached = cached.replace(b'%PROTOCOL%', b'2')
cached = cached.replace(b'%STATIC_PREFIX%', STATIC_PREFIX.encode('utf-8'))
cache[known_html] = cached
return cached
def apply_known_html_headers(known_html: KnownHtml, response: http.server.BaseHTTPRequestHandler):
if known_html == KnownHtml.SOURCE_VIEWER:
frame_ancestors = '\'self\''
frame_options = 'SAMEORIGIN'
else:
frame_ancestors = '\'none\''
frame_options = 'DENY'
# Build our CSP.
# XXXkhuey Hosted Pernosco contains a CSP workaround for
# https://bugs.webkit.org/show_bug.cgi?id=201591. Do we need that here?
csp = f"""
default-src 'self';
script-src 'self' 'sha256-ih8uGBHZhNa8RYX6VNKy3/buYTmJD3KJ+JyIRzUB4s4=' {STATIC_HOST};
connect-src 'self';
style-src 'self' 'unsafe-inline' {STATIC_HOST};
img-src 'self' data: {STATIC_HOST};
font-src 'self' {STATIC_HOST};
child-src 'self' blob: {STATIC_HOST};
frame-ancestors {frame_ancestors};
base-uri 'none';
object-src 'none';
form-action 'none';
"""
response.send_header('Content-Type', 'text/html; charset=utf-8')
# NB: the newlines above make the CSP human readable here but we can't
# send those over the wire.
response.send_header('Content-Security-Policy', csp.replace('\n', ' '))
# XXXkhuey STS?
response.send_header('X-Frame-Options', frame_options)
response.send_header('Referrer-Policy', 'strict-origin-when-cross-origin')
response.send_header('Cache-Control', 'private,must-revalidate')
def run_command() -> None:
run_logger = logging.getLogger("run")
if not shutil.which('rr'):
print("Please install `rr` master and make sure it's on your $PATH.", file=sys.stderr)
sys.exit(1)
reverse_proxy_params = ["--log.level=DEBUG",
"--entryPoints.http.address=:%d"%args.port,
"--providers.file.directory=/traefik-config",
"--entryPoints.http.http.middlewares=auth@file",
"--providers.docker.exposedbydefault=false"]
appserver_extra_arguments = []
appserver_read_write_mounts = []
if args.storage:
base.call(['chmod', '--quiet', '--recursive', 'ugo+rwX', args.storage])
appserver_extra_arguments.extend(["--app-storage", "/pernosco/storage"])
appserver_read_write_mounts.append((args.storage, "/pernosco/storage"))
else:
run_logger.warning("No --storage argument specified, persistent data will be lost.")
run_logger.debug("Starting reverse proxy")
# Start traefik.
reverse_proxy_id = start_container(REVERSE_PROXY_IMAGE, "reverse-proxy", reverse_proxy_params,
network_mode_host=True,
read_write_mounts=[('/var/run/docker.sock', '/var/run/docker.sock')])
run_logger.debug("Reverse proxy up")
dir_provider_logger = logging.getLogger("dir_provider")
http_logger = logging.getLogger("http")
containers = {}
traces_by_dir = {}
with os.scandir(args.dir) as it:
for entry in it:
# Filter out things that aren't valid-looking Pernosco traces.
# Toss anything that's not a directory.
if entry.is_file():
dir_provider_logger.debug("Ignoring non-directory %s"%entry.path)
continue
result = process_one_dir(entry.path, newly_added=False,
extra_arguments=appserver_extra_arguments,
read_write_mounts=appserver_read_write_mounts)
if result is None:
continue
(uuid, container_id) = result
containers[uuid] = container_id
traces_by_dir[entry.path] = uuid
run_logger.debug("Databases enumerated, starting watchdog")
class DatabaseEventHandler(watchdog.events.FileSystemEventHandler):
def on_created(self, event):
# NB: We can't actually check event.is_directory because
# this could be a symlink.
dir_provider_logger.debug("Observed file creation %s"%event.src_path)
result = process_one_dir(event.src_path, newly_added=True,
extra_arguments=appserver_extra_arguments,
read_write_mounts=appserver_read_write_mounts)
if result is None:
return
(uuid, container_id) = result
containers[uuid] = container_id
traces_by_dir[event.src_path] = uuid
def on_deleted(self, event):
# NB: We can't actually check event.is_directory because
# this could be a symlink.
dir_provider_logger.debug("Observed file deletion %s"%event.src_path)
if event.src_path not in traces_by_dir:
return
uuid = traces_by_dir.pop(event.src_path)
container_id = containers.pop(uuid)
stop_container(container_id)
cleanup_container(container_id)
observer = watchdog.observers.Observer()
observer.schedule(DatabaseEventHandler(), args.dir)
observer.start()
def serve_auth():
global auth_server
class AuthHTTPRequestHandler(http.server.BaseHTTPRequestHandler):
def do_GET(self):
# XXXkhuey do real authentication here according to customer needs.
self.send_response(200)
self.send_header('X-Pernosco-Authorized', 'true')
self.end_headers()
server_address = ('', 5001)
auth_server = http.server.HTTPServer(server_address, AuthHTTPRequestHandler)
auth_server.serve_forever()
run_logger.debug("Starting auth thread")
auth_thread = threading.Thread(name="auth_thread", target=serve_auth)
auth_thread.start()
try:
class LobbyHTTPRequestHandler(http.server.BaseHTTPRequestHandler):
def do_GET(self):
url_parts_regex = re.compile("^/debug/(?P<id>[-_a-zA-Z0-9]{22})/(?P<suffix>.*)")
matches = url_parts_regex.match(self.path.split('?')[0])
if not matches:
self.send_response(404)
self.end_headers()
return
id = matches.group("id")
suffix = matches.group("suffix")
if suffix == "index.html":
response = get_known_html(KnownHtml.INDEX, id, containers)
if response == None:
self.send_response(404)
self.end_headers()
return
self.send_response(200)
apply_known_html_headers(KnownHtml.INDEX, self)
self.end_headers()
self.wfile.write(response)
return
if suffix == "source-viewer.html":
response = get_known_html(KnownHtml.SOURCE_VIEWER, id, containers)
if response == None:
self.send_response(404)
self.end_headers()
return
self.send_response(200)
apply_known_html_headers(KnownHtml.SOURCE_VIEWER, self)
self.end_headers()
self.wfile.write(response)
return
self.send_response(404)
self.end_headers()
server_address = ('', 5000)
run_logger.debug("Starting main HTTP server")
primary_server = http.server.ThreadingHTTPServer(server_address, LobbyHTTPRequestHandler)
primary_server.serve_forever()
except KeyboardInterrupt:
global auth_server
assert not auth_server is None
run_logger.debug("Interrupted, shutting down")
auth_server.shutdown()
auth_thread.join()
run_logger.debug("Shutting down app servers")
for container in containers.values():
stop_container(container)
cleanup_container(container)
run_logger.debug("Shutting down reverse proxy")
stop_container(reverse_proxy_id)
cleanup_container(reverse_proxy_id)
if args.subcommand == "run":
run_command()
else:
arg_parser.print_help()