Skip to content
This repository was archived by the owner on Nov 4, 2024. It is now read-only.

feat: api v2 for mdn observatory #522

Open
wants to merge 4 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions httpobs/database/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
periodic_maintenance,
refresh_materialized_views,
select_scan_host_history,
select_scan_most_recent_scan,
select_scan_recent_finished_scans,
select_scan_recent_scan,
select_scan_scanner_statistics,
Expand All @@ -23,6 +24,7 @@
'select_scan_host_history',
'select_scan_recent_finished_scans',
'select_scan_recent_scan',
'select_scan_most_recent_scan',
'select_scan_scanner_statistics',
'select_site_headers',
'select_site_id',
Expand Down
46 changes: 37 additions & 9 deletions httpobs/database/database.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,7 @@ def insert_scan(site_id: int, hidden: bool = False) -> dict:
def insert_test_results(site_id: int, scan_id: int, data: dict) -> dict:
with get_cursor() as cur:
for name, test in data["tests"].items():
test = test.copy() # don't mutate argument
expectation = test.pop('expectation')
passed = test.pop('pass')
result = test.pop('result')
Expand Down Expand Up @@ -327,6 +328,20 @@ def select_scan_recent_scan(site_id: int, recent_in_seconds=API_CACHED_RESULT_TI
return {}


def select_scan_most_recent_scan(site_id: int) -> dict | None:
with get_cursor() as cur:
cur.execute(
"""SELECT * FROM scans
WHERE site_id = %s AND end_time IS NOT NULL
ORDER BY start_time DESC
LIMIT 1""",
(site_id,),
)

if cur.rowcount > 0:
return dict(cur.fetchone())


def select_site_headers(hostname: str) -> dict:
# Return the site's headers
with get_cursor() as cur:
Expand All @@ -351,7 +366,7 @@ def select_site_headers(hostname: str) -> dict:
return {}


def select_site_id(hostname: str) -> int:
def select_site_id(hostname: str, create=True) -> int | None:
# See if the site exists already
with get_cursor() as cur:
cur.execute(
Expand All @@ -366,15 +381,16 @@ def select_site_id(hostname: str) -> int:
return cur.fetchone()['id']

# If not, let's create the site
with get_cursor() as cur:
cur.execute(
"""INSERT INTO sites (domain, creation_time)
VALUES (%s, NOW())
RETURNING id""",
(hostname,),
)
if create:
with get_cursor() as cur:
cur.execute(
"""INSERT INTO sites (domain, creation_time)
VALUES (%s, NOW())
RETURNING id""",
(hostname,),
)

return cur.fetchone()['id']
return cur.fetchone()['id']


def select_test_results(scan_id: int) -> dict:
Expand Down Expand Up @@ -404,6 +420,18 @@ def update_scan_state(scan_id, state: str, error=None) -> dict:

row = dict(cur.fetchone())

elif state == STATE_FAILED:
with get_cursor() as cur:
cur.execute(
"""UPDATE scans
SET (state, end_time) = (%s, NOW())
WHERE id = %s
RETURNING *""",
(state, scan_id),
)

row = dict(cur.fetchone())

else:
with get_cursor() as cur:
cur.execute(
Expand Down
6 changes: 6 additions & 0 deletions httpobs/website/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,12 @@
# TODO: Implement API to write public and private headers to the database


@api.route('/')
@add_response_headers()
def main() -> str:
return 'Welcome to the HTTP Observatory!'


@api.route('/api/v1/analyze', methods=['GET', 'OPTIONS', 'POST'])
@add_response_headers(cors=True)
@sanitized_api_response
Expand Down
137 changes: 137 additions & 0 deletions httpobs/website/api_v2.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,137 @@
import sys
from datetime import datetime, timedelta

from flask import Blueprint, jsonify, request

import httpobs.database as database
import httpobs.scanner as scanner
from httpobs import STATE_FAILED
from httpobs.conf import API_COOLDOWN, DEVELOPMENT_MODE
from httpobs.scanner.grader import get_score_description
from httpobs.website import add_response_headers
from httpobs.website.utils import valid_hostname

api_v2 = Blueprint("api_v2", __name__)


@api_v2.route("/analyze", methods=["GET", "OPTIONS", "POST"])
@add_response_headers(cors=True)
def api_post_scan_hostname():
status_code = 200
scan = {}
tests = {}

host = request.args.get("host", "").lower().strip()
try:
site_id = database.select_site_id(host, create=False)
except IOError:
return {
"error": "database-down",
"text": "Unable to connect to database",
}, 503

if site_id is not None:
hostname = host
else:
ip = True if valid_hostname(host) is None else False
if ip:
return {
"error": "invalid-hostname-ip",
"text": "Cannot scan IP addresses",
}, 400

hostname = valid_hostname(host) or (
valid_hostname("www." + host) if host else False
) # prepend www. if necessary
if not hostname:
return {
"error": "invalid-hostname",
"text": f"{host} is an invalid hostname",
}, 400

site_id: int = database.select_site_id(host, create=True)
scan = database.select_scan_most_recent_scan(site_id)

if scan and request.method == "POST":
time_since_scan = datetime.now() - scan["end_time"]
if time_since_scan < timedelta(seconds=API_COOLDOWN):
status_code = 429 # keep going, we'll respond with the most recent scan
else:
scan = None # clear the scan, and we'll do another

if scan:
scan_id = scan["id"]

tests = database.select_test_results(scan_id)
for name, test in tests.items():
del test["id"]
del test["scan_id"]
del test["site_id"]
del test["name"]
test["score_description"] = get_score_description(test["result"])
tests[name] = {**test.pop("output"), **test}

else:
# no scan means we're a POST which hasn't been rate limited
# or we're a GET for a host which has no scans in the db
# either way, we need to perform a scan

hidden = request.form.get("hidden", "false") == "true"

scan = database.insert_scan(site_id, hidden=hidden)
scan_id = scan["id"]

# Get the site's cookies and headers
# TODO: add API to insert these into the db
# headers = database.select_site_headers(hostname)

try:
result = scanner.scan(hostname)

if "error" in result:
scan = database.update_scan_state(scan_id, STATE_FAILED, error=result["error"])
else:
scan = database.insert_test_results(
site_id,
scan_id,
result,
)
tests = result["tests"]
except:
# If we are unsuccessful, close out the scan in the database
scan = database.update_scan_state(scan_id, STATE_FAILED)

# Print the exception to stderr if we're in dev
if DEVELOPMENT_MODE:
import traceback

print("Error detected in scan for: " + hostname)
traceback.print_exc(file=sys.stderr)

scan["start_time"] = scan["start_time"].isoformat()
scan["end_time"] = scan["end_time"].isoformat()

history = database.select_scan_host_history(site_id)

# Prune for when the score doesn't change; thanks to chuck for the elegant list comprehension
history = [
{
"end_time": v["end_time"].isoformat(),
"grade": v["grade"],
"id": v["scan_id"],
"score": v["score"],
}
for k, v in enumerate(history)
if history[k].get('score') is not history[k - 1].get('score') or k == 0
]

return (
jsonify(
{
"scan": scan,
"tests": tests,
"history": history,
}
),
status_code,
)
34 changes: 16 additions & 18 deletions httpobs/website/main.py
Original file line number Diff line number Diff line change
@@ -1,34 +1,32 @@
import sys

from flask import Flask

from httpobs.conf import API_PORT, API_PROPAGATE_EXCEPTIONS, DEVELOPMENT_MODE
from httpobs.website import add_response_headers
from httpobs.website.api import api
from httpobs.website.monitoring import monitoring_api


def __exit_with(msg: str) -> None:
print(msg)
sys.exit(1)

def create_app():
# Register the application with flask
app = Flask('http-observatory')
app.config['PROPAGATE_EXCEPTIONS'] = API_PROPAGATE_EXCEPTIONS

# Register the application with flask
app = Flask('http-observatory')
app.config['PROPAGATE_EXCEPTIONS'] = API_PROPAGATE_EXCEPTIONS
app.register_blueprint(api)
app.register_blueprint(monitoring_api)
from httpobs.website.api import api
from httpobs.website.api_v2 import api_v2
from httpobs.website.monitoring import monitoring_api

app.register_blueprint(api)
app.register_blueprint(api_v2, url_prefix="/api/v2")
app.register_blueprint(monitoring_api)

@app.route('/')
@add_response_headers()
def main() -> str:
return 'Welcome to the HTTP Observatory!'
return app


def run():
app = create_app()
app.run(debug=DEVELOPMENT_MODE, port=API_PORT)


if __name__ == '__main__':
run()

# make backwards compatible with uwsgi setup
# TODO: move into wsgi.py
app = create_app()