Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

added: shallow health check endpoint #78

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
139 changes: 139 additions & 0 deletions tails_server/health.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,139 @@
import asyncio
import imp
import json
import os
import logging
import socket
import sys
import time
import traceback
from aiohttp import web


try:
from functools import reduce
except Exception:
pass


def basic_exception_handler(_, e):
return False, str(e)


def json_success_handler(results):
data = {
'hostname': socket.gethostname(),
'status': 'success',
'timestamp': time.time(),
'results': results,
}

return json.dumps(data)


def json_failed_handler(results):
data = {
'hostname': socket.gethostname(),
'status': 'failure',
'timestamp': time.time(),
'results': results,
}

return json.dumps(data)


def check_reduce(passed, result):
return passed and result.get('passed')


class Check(object):
def __init__(self, success_status=200, success_headers=None,
success_handler=json_success_handler, success_ttl=None,
failed_status=500, failed_headers=None,
failed_handler=json_failed_handler, failed_ttl=None,
exception_handler=basic_exception_handler, checkers=None,
logger=None, **options):
self.cache = dict()

self.success_status = success_status
self.success_headers = success_headers or {'Content-Type': 'application/json'}
self.success_handler = success_handler
self.success_ttl = float(success_ttl or 0)

self.failed_status = failed_status
self.failed_headers = failed_headers or {'Content-Type': 'application/json'}
self.failed_handler = failed_handler
self.failed_ttl = float(failed_ttl or 0)

self.exception_handler = exception_handler

self.options = options
self.checkers = checkers or []

self.logger = logger
if not self.logger:
self.logger = logging.getLogger('HealthCheck')

@asyncio.coroutine
def __call__(self, request):
message, status, headers = yield from self.check()
return web.Response(text=message, status=status, headers=headers)

def add_check(self, func):
if not asyncio.iscoroutinefunction(func):
func = asyncio.coroutine(func)

self.checkers.append(func)

@asyncio.coroutine
def run_check(self, checker):
try:
passed, output = yield from checker()
except Exception:
traceback.print_exc()
e = sys.exc_info()[0]
self.logger.exception(e)
passed, output = self.exception_handler(checker, e)

if not passed:
msg = 'Health check "{}" failed with output "{}"'.format(checker.__name__, output)
self.logger.error(msg)

timestamp = time.time()
if passed:
expires = timestamp + self.success_ttl
else:
expires = timestamp + self.failed_ttl

result = {'checker': checker.__name__,
'output': output,
'passed': passed,
'timestamp': timestamp,
'expires': expires}
return result

@asyncio.coroutine
def check(self):
results = []
for checker in self.checkers:
if checker in self.cache and self.cache[checker].get('expires') >= time.time():
result = self.cache[checker]
else:
result = yield from self.run_check(checker)
self.cache[checker] = result
results.append(result)

passed = reduce(check_reduce, results, True)

if passed:
message = "OK"
if self.success_handler:
message = self.success_handler(results)

return message, self.success_status, self.success_headers
else:
message = "NOT OK"
if self.failed_handler:
message = self.failed_handler(results)

return message, self.failed_status, self.failed_headers
11 changes: 11 additions & 0 deletions tails_server/web.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@

routes = web.RouteTableDef()

from .health import Check, EnvDump

@routes.get("/match/{substring}")
async def match_files(request):
Expand Down Expand Up @@ -231,6 +232,8 @@ async def put_file_by_hash(request):

return web.Response(text=tails_hash)

def check_shallow():
return True

def start(settings):
app = web.Application()
Expand All @@ -239,6 +242,14 @@ def start(settings):
# Add routes
app.add_routes(routes)

# To avoid putting too much strain on backend services, health check results can be cached in process memory.
# By default, they are set to None, so we need to set them to a specific time intervals for the cache to function
check = Check(success_ttl=30, failed_ttl=10)
app.router.add_get("/health/check", check)

# To enable extensibility, we can use `add_check` to inject our own custom check method (example above)
check.add_check(check_shallow)

web.run_app(
app,
host=settings.get("host") or DEFAULT_WEB_HOST,
Expand Down