From 828710a2a7ea100db8261545f9eb4e0c8a9dd5ce Mon Sep 17 00:00:00 2001 From: Alex Kotlar Date: Sat, 2 Feb 2019 23:45:42 -0500 Subject: [PATCH 1/6] ujson, removed unused imports --- scorecard/environment.yml | 1 + scorecard/scorecard/scorecard.py | 9 +++++---- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/scorecard/environment.yml b/scorecard/environment.yml index 8e5e8105d85..1ea5654e5d8 100644 --- a/scorecard/environment.yml +++ b/scorecard/environment.yml @@ -5,5 +5,6 @@ dependencies: - flask-cors - humanize - pip +- ujson - pip: - PyGithub diff --git a/scorecard/scorecard/scorecard.py b/scorecard/scorecard/scorecard.py index 5380f02d816..fd3df94441d 100644 --- a/scorecard/scorecard/scorecard.py +++ b/scorecard/scorecard/scorecard.py @@ -3,13 +3,14 @@ import datetime import os import sys -from flask import Flask, render_template, request, jsonify, abort, url_for +from flask import Flask, render_template from flask_cors import CORS from github import Github import random import threading import humanize import logging +import ujson fmt = logging.Formatter( # NB: no space after levename because WARNING is so long @@ -82,16 +83,16 @@ def json_all_users(): for issue in urgent_issues: issue['timedelta'] = humanize.naturaltime(issue['timedelta']) - return jsonify(updated=updated, user_data=user_data, unassigned=unassigned, urgent_issues=urgent_issues) + return ujson.dumps({"updated": updated, "user_data": user_data, "unassigned": unassigned, "urgent_issues":urgent_issues}), 200 @app.route('/json/users/') def json_user(user): user_data, updated = get_user(user) - return jsonify(updated=updated, data=user_data) + return ujson.dumps({"updated": updated, "user_data": user_data}), 200 @app.route('/json/random') def json_random_user(): - return jsonify(random.choice(users)) + return random.choice(users), 200 def get_users(): cur_data = data From a370c0ac5f15f7482d8cd39d23f883d663c9725a Mon Sep 17 00:00:00 2001 From: Alex Kotlar Date: Sun, 3 Feb 2019 00:42:14 -0500 Subject: [PATCH 2/6] sanic version of scorecard, aka asyncio via uvloop. should be quite a bit faster than flask, but this is mostly a dry run testing ease of conversion from flask. templates have been modified to avoid favicon.ico 404, and formatting into 80 line width for user.html --- scorecard/environment.yml | 6 +- scorecard/scorecard/scorecard.py | 52 +++++++++---- scorecard/scorecard/templates/index.html | 1 + scorecard/scorecard/templates/user.html | 98 +++++++++++++----------- 4 files changed, 95 insertions(+), 62 deletions(-) diff --git a/scorecard/environment.yml b/scorecard/environment.yml index 1ea5654e5d8..accaebce9ca 100644 --- a/scorecard/environment.yml +++ b/scorecard/environment.yml @@ -1,10 +1,12 @@ name: scorecard dependencies: - python=3.7 -- flask -- flask-cors - humanize - pip - ujson +- jinja2 - pip: - PyGithub + - sanic + - sanic-cors + - uvloop diff --git a/scorecard/scorecard/scorecard.py b/scorecard/scorecard/scorecard.py index fd3df94441d..60083b7d1dc 100644 --- a/scorecard/scorecard/scorecard.py +++ b/scorecard/scorecard/scorecard.py @@ -3,14 +3,22 @@ import datetime import os import sys -from flask import Flask, render_template -from flask_cors import CORS from github import Github import random import threading import humanize import logging import ujson +import uvloop +from sanic import Sanic +from sanic.response import text,json,html +from sanic_cors import CORS +from jinja2 import Environment, PackageLoader, select_autoescape + +env = Environment(loader=PackageLoader('scorecard', 'templates/'), autoescape=select_autoescape(['html', 'xml', 'tpl']), enable_async=True) + +users_template = env.get_template('index.html') +one_user_templ = env.get_template('user.html') fmt = logging.Formatter( # NB: no space after levename because WARNING is so long @@ -56,44 +64,50 @@ 'cloudtools': 'Nealelab/cloudtools' } -app = Flask('scorecard') +app = Sanic(__name__) CORS(app, resources={r'/json/*': {'origins': '*'}}) data = None +users_data = None +rendered_users_template = None timsetamp = None @app.route('/') -def index(): - user_data, unassigned, urgent_issues, updated = get_users() +async def index(request): + user_data, unassigned, urgent_issues, updated = users_data random_user = random.choice(users) - return render_template('index.html', unassigned=unassigned, + tmpl = await users_template.render_async( unassigned=unassigned, user_data=user_data, urgent_issues=urgent_issues, random_user=random_user, updated=updated) + return html(tmpl) @app.route('/users/') -def html_get_user(user): +async def html_get_user(request, user): user_data, updated = get_user(user) - return render_template('user.html', user=user, user_data=user_data, updated=updated) + + tmpl = await one_user_templ.render_async(user=user, user_data=user_data, updated=updated) + return html(tmpl) @app.route('/json') -def json_all_users(): - user_data, unassigned, urgent_issues, updated = get_users() +async def json_all_users(request): + user_data, unassigned, urgent_issues, updated = users_data for issue in urgent_issues: issue['timedelta'] = humanize.naturaltime(issue['timedelta']) - return ujson.dumps({"updated": updated, "user_data": user_data, "unassigned": unassigned, "urgent_issues":urgent_issues}), 200 + return json({"updated": updated, "user_data": user_data, "unassigned": unassigned, "urgent_issues":urgent_issues}) @app.route('/json/users/') -def json_user(user): +async def json_user(request,user): user_data, updated = get_user(user) - return ujson.dumps({"updated": updated, "user_data": user_data}), 200 + return json({"updated": updated, "user_data": user_data}) @app.route('/json/random') -def json_random_user(): - return random.choice(users), 200 +async def json_random_user(request): + return text(random.choice(users)) +# Lets cache this; we now call this only during update def get_users(): cur_data = data cur_timestamp = timestamp @@ -237,7 +251,7 @@ def get_issue_data(repo_name, issue): } def update_data(): - global data, timestamp + global data, timestamp, users_data, users_template log.info(f'rate_limit {github.get_rate_limit()}') log.info('start updating_data') @@ -270,6 +284,10 @@ def update_data(): data = new_data timestamp = now + # Takes reference, so don't mutate data + # user_data, unassigned, urgent_issues, updated = users_data + users_data = get_users() + def poll(): while True: time.sleep(180) @@ -302,4 +320,4 @@ def run_forever(target, *args, **kwargs): poll_thread.start() if __name__ == '__main__': - app.run(host='0.0.0.0') + app.run(host='0.0.0.0', port=5000) diff --git a/scorecard/scorecard/templates/index.html b/scorecard/scorecard/templates/index.html index 9e7a09ade52..4e3ee76730d 100644 --- a/scorecard/scorecard/templates/index.html +++ b/scorecard/scorecard/templates/index.html @@ -1,6 +1,7 @@ Scorecard + - - -
-
-

-
+ + +
+
+

+
-
+

Welcome to Scorecard, {{ user }}!

Needs Review

{% if user_data['NEEDS_REVIEW'] %} - + {% for pr in user_data['NEEDS_REVIEW'] %} - - - + + + {% endfor %} - +
{{ pr.id }}{{ pr.user }}{{ pr.title }} + {{ pr.id }} + + {{ pr.user }} + {{ pr.title }}
{% else %}

No reviews needed.

@@ -51,14 +59,16 @@

Needs Review

Changes Requested

{% if user_data['CHANGES_REQUESTED'] %} - + {% for pr in user_data['CHANGES_REQUESTED'] %} - - + + {% endfor %} - +
{{ pr.id }}{{ pr.title }} + {{ pr.id }} + {{ pr.title }}
{% else %}

No changes requested.

@@ -67,41 +77,43 @@

Changes Requested

Failing tests

{% if user_data['FAILING'] %} - + {% for pr in user_data['FAILING'] %} - - + + {% endfor %} - +
{{ pr.id }}{{ pr.title }} + {{ pr.id }} + {{ pr.title }}
{% else %}

No failing builds.

{% endif %} -

Issues

{% if user_data['ISSUES'] %} - + {% for issue in user_data['ISSUES'] %} - - + + {% endfor %} - +
{{ issue.id }}{{ issue.title }} + {{ issue.id }} + {{ issue.title }}
{% else %}

No issues.

{% endif %}

- last updated {{ updated }} + last updated {{ updated }}

+
-
- - + From ca0d45c006955446897fc90262b2342a60718c7b Mon Sep 17 00:00:00 2001 From: Alex Kotlar Date: Sun, 3 Feb 2019 00:46:39 -0500 Subject: [PATCH 3/6] apparently sanic defaults to inclusion of ujson and uvloop, even if they aren't imported. I would rather they make those imports an explicit requirement... --- scorecard/environment.yml | 2 -- scorecard/scorecard/scorecard.py | 2 -- 2 files changed, 4 deletions(-) diff --git a/scorecard/environment.yml b/scorecard/environment.yml index accaebce9ca..7f96cec9cf7 100644 --- a/scorecard/environment.yml +++ b/scorecard/environment.yml @@ -3,10 +3,8 @@ dependencies: - python=3.7 - humanize - pip -- ujson - jinja2 - pip: - PyGithub - sanic - sanic-cors - - uvloop diff --git a/scorecard/scorecard/scorecard.py b/scorecard/scorecard/scorecard.py index 60083b7d1dc..f9888d4d92b 100644 --- a/scorecard/scorecard/scorecard.py +++ b/scorecard/scorecard/scorecard.py @@ -8,8 +8,6 @@ import threading import humanize import logging -import ujson -import uvloop from sanic import Sanic from sanic.response import text,json,html from sanic_cors import CORS From 79d5c656bee9f6729d430d6b32f7c1c6bba65086 Mon Sep 17 00:00:00 2001 From: Alex Kotlar Date: Sun, 3 Feb 2019 01:15:17 -0500 Subject: [PATCH 4/6] dockerfile needs gcc to build sanic --- scorecard/Dockerfile | 1 + 1 file changed, 1 insertion(+) diff --git a/scorecard/Dockerfile b/scorecard/Dockerfile index 4f23f5315ea..d628777624e 100644 --- a/scorecard/Dockerfile +++ b/scorecard/Dockerfile @@ -2,6 +2,7 @@ FROM continuumio/miniconda MAINTAINER Hail Team COPY environment.yml . +RUN apt-get update && apt-get install -y linux-headers-amd64 build-essential RUN conda env create scorecard -f environment.yml && \ rm -f environment.yml && \ rm -rf /home/root/.conda/pkgs/* From 75078f43f6fb4e412d8aed93db7236ba49bce5f7 Mon Sep 17 00:00:00 2001 From: Alex Kotlar Date: Sun, 3 Feb 2019 01:21:56 -0500 Subject: [PATCH 5/6] cache all-users json at generation time --- scorecard/scorecard/scorecard.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/scorecard/scorecard/scorecard.py b/scorecard/scorecard/scorecard.py index f9888d4d92b..f17d15e8471 100644 --- a/scorecard/scorecard/scorecard.py +++ b/scorecard/scorecard/scorecard.py @@ -8,6 +8,7 @@ import threading import humanize import logging +import ujson # included by Sanic from sanic import Sanic from sanic.response import text,json,html from sanic_cors import CORS @@ -67,6 +68,7 @@ data = None users_data = None +users_json = None rendered_users_template = None timsetamp = None @@ -94,7 +96,7 @@ async def json_all_users(request): for issue in urgent_issues: issue['timedelta'] = humanize.naturaltime(issue['timedelta']) - return json({"updated": updated, "user_data": user_data, "unassigned": unassigned, "urgent_issues":urgent_issues}) + return text(users_json) @app.route('/json/users/') async def json_user(request,user): @@ -249,7 +251,7 @@ def get_issue_data(repo_name, issue): } def update_data(): - global data, timestamp, users_data, users_template + global data, timestamp, users_data, users_json log.info(f'rate_limit {github.get_rate_limit()}') log.info('start updating_data') @@ -285,6 +287,7 @@ def update_data(): # Takes reference, so don't mutate data # user_data, unassigned, urgent_issues, updated = users_data users_data = get_users() + users_json = ujson.dumps({"user_data": users_data[0], "unassigned": users_data[1], "urgent_issues": users_data[2], "updated": users_data[3]}) def poll(): while True: From 90b1e74917ec4ad612aa528374a8ef3efd09ad00 Mon Sep 17 00:00:00 2001 From: Alex Kotlar Date: Wed, 6 Feb 2019 13:30:31 -0500 Subject: [PATCH 6/6] fixes bug from the original flask version in which startup github query would run twice. don't cache timestamp diff, just timestamp; remove unnecessary globals use from flask v. add comments clarifying race conditions, shared between flask and current version. improve favicon.ico handling for chrome --- scorecard/environment.yml | 1 + scorecard/scorecard/scorecard.py | 121 +++++++++++++---------- scorecard/scorecard/static/favicon.ico | 0 scorecard/scorecard/templates/index.html | 2 +- scorecard/scorecard/templates/user.html | 2 +- 5 files changed, 72 insertions(+), 54 deletions(-) create mode 100644 scorecard/scorecard/static/favicon.ico diff --git a/scorecard/environment.yml b/scorecard/environment.yml index 7f96cec9cf7..d7a30f031bb 100644 --- a/scorecard/environment.yml +++ b/scorecard/environment.yml @@ -8,3 +8,4 @@ dependencies: - PyGithub - sanic - sanic-cors + - ujson diff --git a/scorecard/scorecard/scorecard.py b/scorecard/scorecard/scorecard.py index f17d15e8471..80ddaec7328 100644 --- a/scorecard/scorecard/scorecard.py +++ b/scorecard/scorecard/scorecard.py @@ -8,13 +8,14 @@ import threading import humanize import logging -import ujson # included by Sanic from sanic import Sanic -from sanic.response import text,json,html +from sanic.response import text, json, html from sanic_cors import CORS from jinja2 import Environment, PackageLoader, select_autoescape +import ujson -env = Environment(loader=PackageLoader('scorecard', 'templates/'), autoescape=select_autoescape(['html', 'xml', 'tpl']), enable_async=True) +env = Environment(loader=PackageLoader('scorecard', 'templates/'), + autoescape=select_autoescape(['html', 'xml', 'tpl']), enable_async=True) users_template = env.get_template('index.html') one_user_templ = env.get_template('user.html') @@ -66,54 +67,63 @@ app = Sanic(__name__) CORS(app, resources={r'/json/*': {'origins': '*'}}) -data = None -users_data = None -users_json = None -rendered_users_template = None -timsetamp = None +fav_path = os.path.join(os.path.dirname(__file__), 'static', 'favicon.ico') +app.static('/favicon.ico', fav_path) + +########### Global variables that are modified in a separate thread ############ +# Must be only read, never written in parent thread, else need to use Lock() +# http://effbot.org/zone/thread-synchronization.htm#synchronizing-access-to-shared-resources +data=None +users_data=None +users_json=None +timsetamp=None +################################################################################ + @app.route('/') async def index(request): - user_data, unassigned, urgent_issues, updated = users_data + user_data, unassigned, urgent_issues=users_data + + # Read timestamp as quickly as possible in case timestamp gets modified + # by forever_poll thread + cur_timestamp=timestamp + updated=humanize.naturaltime( + datetime.datetime.now() - datetime.timedelta(seconds=time.time() - cur_timestamp)) - random_user = random.choice(users) + random_user=random.choice(users) - tmpl = await users_template.render_async( unassigned=unassigned, - user_data=user_data, urgent_issues=urgent_issues, random_user=random_user, updated=updated) + tmpl=await users_template.render_async(unassigned = unassigned, + user_data = user_data, urgent_issues = urgent_issues, random_user = random_user, updated = updated) return html(tmpl) + @app.route('/users/') async def html_get_user(request, user): - user_data, updated = get_user(user) + user_data, updated=get_user(user) - tmpl = await one_user_templ.render_async(user=user, user_data=user_data, updated=updated) + tmpl=await one_user_templ.render_async(user = user, user_data = user_data, updated = updated) return html(tmpl) + @app.route('/json') async def json_all_users(request): - user_data, unassigned, urgent_issues, updated = users_data - - for issue in urgent_issues: - issue['timedelta'] = humanize.naturaltime(issue['timedelta']) - return text(users_json) + @app.route('/json/users/') -async def json_user(request,user): - user_data, updated = get_user(user) +async def json_user(request, user): + user_data, updated=get_user(user) return json({"updated": updated, "user_data": user_data}) + @app.route('/json/random') async def json_random_user(request): return text(random.choice(users)) -# Lets cache this; we now call this only during update -def get_users(): - cur_data = data - cur_timestamp = timestamp - unassigned = [] - user_data = collections.defaultdict( +def get_and_cache_users(github_data): + unassigned=[] + user_data=collections.defaultdict( lambda: {'CHANGES_REQUESTED': [], 'NEEDS_REVIEW': [], 'ISSUES': []}) @@ -146,7 +156,7 @@ def add_issue(repo_name, issue): else: d['ISSUES'].append(issue) - for repo_name, repo_data in cur_data.items(): + for repo_name, repo_data in github_data.items(): for pr in repo_data['prs']: if len(pr['assignees']) == 0: unassigned.append(pr) @@ -157,20 +167,22 @@ def add_issue(repo_name, issue): for issue in repo_data['issues']: add_issue(repo_name, issue) - list.sort(urgent_issues, key=lambda issue: issue['timedelta'], reverse=True) + list.sort(urgent_issues, + key = lambda issue: issue['timedelta'], reverse=True) - updated = humanize.naturaltime( - datetime.datetime.now() - datetime.timedelta(seconds = time.time() - cur_timestamp)) + return (user_data, unassigned, urgent_issues) - return (user_data, unassigned, urgent_issues, updated) def get_user(user): - global data, timestamp + global data cur_data = data cur_timestamp = timestamp - user_data = { + updated = humanize.naturaltime( + datetime.datetime.now() - datetime.timedelta(seconds=time.time() - cur_timestamp)) + + user_data={ 'CHANGES_REQUESTED': [], 'NEEDS_REVIEW': [], 'FAILING': [], @@ -196,8 +208,6 @@ def get_user(user): if user in issue['assignees']: user_data['ISSUES'].append(issue) - updated = humanize.naturaltime( - datetime.datetime.now() - datetime.timedelta(seconds=time.time() - cur_timestamp)) return (user_data, updated) @@ -207,6 +217,7 @@ def get_id(repo_name, number): else: return f'{repo_name}/{number}' + def get_pr_data(repo, repo_name, pr): assignees = [a.login for a in pr.assignees] @@ -222,7 +233,8 @@ def get_pr_data(repo, repo_name, pr): break else: if review.state != 'COMMENTED': - log.warning(f'unknown review state {review.state} on review {review} in pr {pr}') + log.warning( + f'unknown review state {review.state} on review {review} in pr {pr}') sha = pr.head.sha status = repo.get_commit(sha=sha).get_combined_status().state @@ -238,6 +250,7 @@ def get_pr_data(repo, repo_name, pr): 'status': status } + def get_issue_data(repo_name, issue): assignees = [a.login for a in issue.assignees] return { @@ -250,6 +263,7 @@ def get_issue_data(repo_name, issue): 'created_at': issue.created_at } + def update_data(): global data, timestamp, users_data, users_json @@ -276,31 +290,25 @@ def update_data(): issue_data = get_issue_data(repo_name, issue) new_data[repo_name]['issues'].append(issue_data) - log.info('updating_data done') - now = time.time() - data = new_data - timestamp = now + timestamp = time.time() + users_data = get_and_cache_users(new_data) + users_json = ujson.dumps( + {"user_data": users_data[0], "unassigned": users_data[1], "urgent_issues": users_data[2], "timestamp": timestamp}) - # Takes reference, so don't mutate data - # user_data, unassigned, urgent_issues, updated = users_data - users_data = get_users() - users_json = ujson.dumps({"user_data": users_data[0], "unassigned": users_data[1], "urgent_issues": users_data[2], "updated": users_data[3]}) def poll(): while True: time.sleep(180) update_data() -update_data() def run_forever(target, *args, **kwargs): - # target should be a function target_name = target.__name__ + expected_retry_interval_ms = 15 * 1000 # 15s - expected_retry_interval_ms = 15 * 1000 # 15s while True: start = time.time() try: @@ -308,17 +316,26 @@ def run_forever(target, *args, **kwargs): target(*args, **kwargs) log.info(f'target {target_name} returned') except: - log.error(f'target {target_name} threw exception', exc_info=sys.exc_info()) + log.error(f'target {target_name} threw exception', + exc_info=sys.exc_info()) end = time.time() run_time_ms = int((end - start) * 1000 + 0.5) + t = random.randrange(expected_retry_interval_ms * 2) - run_time_ms if t > 0: log.debug(f'{target_name}: sleep {t}ms') time.sleep(t / 1000.0) -poll_thread = threading.Thread(target=run_forever, args=(poll,), daemon=True) -poll_thread.start() if __name__ == '__main__': - app.run(host='0.0.0.0', port=5000) + # Any code that is run before main gets executed twice, run here + + update_data() + + poll_thread = threading.Thread( + target=run_forever, args=(poll,), daemon=True) + + poll_thread.start() + + app.run(host='0.0.0.0', port=5000, debug=False) diff --git a/scorecard/scorecard/static/favicon.ico b/scorecard/scorecard/static/favicon.ico new file mode 100644 index 00000000000..e69de29bb2d diff --git a/scorecard/scorecard/templates/index.html b/scorecard/scorecard/templates/index.html index 4e3ee76730d..9200fab81d3 100644 --- a/scorecard/scorecard/templates/index.html +++ b/scorecard/scorecard/templates/index.html @@ -1,7 +1,7 @@ Scorecard - +