From d9a9750721af85f8315155919b6161162e84ea9c Mon Sep 17 00:00:00 2001 From: Michel Van den Bergh Date: Wed, 1 May 2024 01:45:48 +0000 Subject: [PATCH] Don't save the residuals and their colors in the db. The main reason for this PR is that currently the residuals are updated in tests_view. This is wrong. tests_view is a GET request. It is not allowed to change the server state. So if we want updated residuals then we need to update them during POST requests, such as update_task and failed_task. However this may be CPU intensive if a fleet of workers disconnects. We could throttle the updates but this requires some extra state that needs to be maintained (the time of the last update). So the alternative is to compute the residuals on the fly when they are displayed. This is what this PR does. This solves for example the issue #1948. It should now again be possible to serve the tasks from a secondary instance. Computing residuals on the fly is not possible for bad tasks. So for bad tasks we record the residual and its color from the time they became bad. However the color is saved symbolically as green, yellow, red, instead of as an actual hex-code. --- server/fishtest/api.py | 2 +- server/fishtest/rundb.py | 27 +++++++++------ server/fishtest/schemas.py | 9 +++-- server/fishtest/templates/tasks.mak | 11 +++--- server/fishtest/util.py | 54 ++++++++++++++++++----------- server/fishtest/views.py | 4 +-- 6 files changed, 65 insertions(+), 42 deletions(-) diff --git a/server/fishtest/api.py b/server/fishtest/api.py index 7179158d2..f06a95061 100644 --- a/server/fishtest/api.py +++ b/server/fishtest/api.py @@ -436,7 +436,7 @@ def get_task(self): task["last_updated"] = str(task["last_updated"]) if "residual" in task: # json does not know about infinity - if task["residual"] == float("inf"): + if task.get("residual", None) == float("inf"): task["residual"] = "inf" return task diff --git a/server/fishtest/rundb.py b/server/fishtest/rundb.py index fda483bb2..c723a9e9b 100644 --- a/server/fishtest/rundb.py +++ b/server/fishtest/rundb.py @@ -46,7 +46,7 @@ get_hash, get_tc_ratio, remaining_hours, - update_residuals, + residual_to_color, worker_name, ) from fishtest.workerdb import WorkerDb @@ -554,7 +554,8 @@ def new_run( "tasks": [], "approved": False, "approver": "", - # Aggregated data + "bad_tasks": [], + # Aggregated results "results": { "wins": 0, "losses": 0, @@ -1630,7 +1631,11 @@ def purge_run(self, run, p=0.001, res=7.0, iters=1): if run.get("failed", False): return "You cannot purge a failed run" message = "No bad workers" - # Transfer bad tasks to run["bad_tasks"] + + # The following is necessary to be able to purge + # old runs. It can be deleted after 30 days + # (cfr above). + if "bad_tasks" not in run: run["bad_tasks"] = [] @@ -1642,14 +1647,10 @@ def purge_run(self, run, p=0.001, res=7.0, iters=1): # Special cases: crashes or time losses. if crash_or_time(task): message = "" - # The residual or residual color my not have been set yet - self.set_bad_task(task_id, run, residual=10.0, residual_color="#FF6A6A") + # The residual or residual color may not have been set yet + self.set_bad_task(task_id, run, residual=10.0, residual_color="red") chi2 = get_chi2(run["tasks"]) - # Make sure the residuals are up to date. - # Once a task is moved to run["bad_tasks"] its - # residual will no longer change. - update_residuals(run["tasks"], cached_chi2=chi2) bad_workers = get_bad_workers( run["tasks"], cached_chi2=chi2, @@ -1663,7 +1664,13 @@ def purge_run(self, run, p=0.001, res=7.0, iters=1): continue if task["worker_info"]["unique_key"] in bad_workers: message = "" - self.set_bad_task(task_id, run) + residual = chi2["residual"].get( + task["worker_info"]["unique_key"], float("inf") + ) + residual_color = residual_to_color(residual, chi2) + self.set_bad_task( + task_id, run, residual=residual, residual_color=residual_color + ) if message == "": results = compute_results(run) diff --git a/server/fishtest/schemas.py b/server/fishtest/schemas.py index c9b18bd9a..ddc7b9dcc 100644 --- a/server/fishtest/schemas.py +++ b/server/fishtest/schemas.py @@ -62,6 +62,7 @@ even = div(2, name="even") datetime_utc = intersect(datetime, fields({"tzinfo": timezone.utc})) gzip_data = magic("application/gzip", name="gzip_data") +residual_color = set_name(union("green", "yellow", "red"), "residual_color") uint = intersect(int, ge(0)) suint = intersect(int, gt(0)) @@ -640,7 +641,7 @@ def flags_must_match(run): # about non-validation of runs created with the prior # schema. -RUN_VERSION = 6 +RUN_VERSION = 7 runs_schema = intersect( { @@ -762,8 +763,6 @@ def flags_must_match(run): "active": bool, "last_updated": datetime_utc, "start": uint, - "residual?": number, - "residual_color?": str, "bad?": True, "stats": results_schema, "worker_info": worker_info_schema_runs, @@ -772,14 +771,14 @@ def flags_must_match(run): ), ..., ], - "bad_tasks?": [ + "bad_tasks": [ { "num_games": intersect(uint, even), "active": False, "last_updated": datetime_utc, "start": uint, "residual": number, - "residual_color": str, + "residual_color": residual_color, "bad": True, "task_id": task_id, "stats": results_schema, diff --git a/server/fishtest/templates/tasks.mak b/server/fishtest/templates/tasks.mak index 62b71d04b..5302bff4f 100644 --- a/server/fishtest/templates/tasks.mak +++ b/server/fishtest/templates/tasks.mak @@ -1,5 +1,5 @@ <%! - from fishtest.util import worker_name + from fishtest.util import worker_name, display_residual %> % for idx, task in enumerate(run['tasks'] + run.get('bad_tasks', [])): @@ -70,9 +70,12 @@ ${stats.get('time_losses', '-')} % if 'spsa' not in run['args']: - % if 'residual' in task and task['residual']!=float("inf"): - - ${f"{task['residual']:.3f}"} + <% + d=display_residual(task, chi2) + %> + % if d['residual']!=float("inf"): + + ${f"{d['residual']:.3f}"} % else: - diff --git a/server/fishtest/util.py b/server/fishtest/util.py index 37da5d9e0..eea12a311 100644 --- a/server/fishtest/util.py +++ b/server/fishtest/util.py @@ -209,30 +209,44 @@ def get_bad_workers(tasks, cached_chi2=None, p=0.001, res=7.0, iters=1): return bad_workers -def update_residuals(tasks, cached_chi2=None): - # If we have an up-to-date result of get_chi2() we can pass - # it as cached_chi2 to avoid needless recomputation. - chi2 = get_chi2(tasks) if cached_chi2 is None else cached_chi2 - residuals = chi2["residual"] +def residual_to_color(residual, chi2): + if abs(residual) < chi2["z_95"]: + return "green" + elif abs(residual) < chi2["z_99"]: + return "yellow" + else: + return "red" - for task in tasks: - if "bad" in task: - continue - if "worker_info" not in task: - continue - task["residual"] = residuals.get( - task["worker_info"]["unique_key"], float("inf") - ) +def display_residual(task, chi2): + if "bad" in task and "residual" in task: + residual = task["residual"] + residual_color = task["residual_color"] + else: if crash_or_time(task): - task["residual"] = 10.0 - task["residual_color"] = "#FF6A6A" - elif abs(task["residual"]) < chi2["z_95"]: - task["residual_color"] = "#44EB44" - elif abs(task["residual"]) < chi2["z_99"]: - task["residual_color"] = "yellow" + residual = 10.0 + residual_color = "red" else: - task["residual_color"] = "#FF6A6A" + residual = chi2["residual"].get( + task["worker_info"]["unique_key"], float("inf") + ) + residual_color = residual_to_color(residual, chi2) + + display_colors = { + "green": "#44EB44", + "yellow": "yellow", + "red": "#FF6A6A", + "#44EB44": "#44EB44", # legacy bad tasks + "#FF6A6A": "#FF6A6A", + } + + display_color = display_colors[residual_color] + + return { + "residual": residual, + "residual_color": residual_color, + "display_color": display_color, + } def format_bounds(elo_model, elo0, elo1): diff --git a/server/fishtest/views.py b/server/fishtest/views.py index e560520c2..21f0c54dd 100644 --- a/server/fishtest/views.py +++ b/server/fishtest/views.py @@ -23,7 +23,6 @@ github_repo_valid, is_sprt_ltc_data, password_strength, - update_residuals, ) from pyramid.httpexceptions import HTTPFound, HTTPNotFound from pyramid.security import forget, remember @@ -1461,6 +1460,7 @@ def tests_tasks(request): run = request.rundb.get_run(request.matchdict["id"]) if run is None: raise HTTPNotFound() + chi2 = get_chi2(run["tasks"]) try: show_task = int(request.params.get("show_task", -1)) @@ -1473,6 +1473,7 @@ def tests_tasks(request): "run": run, "approver": request.has_permission("approve_run"), "show_task": show_task, + "chi2": chi2, } @@ -1603,7 +1604,6 @@ def tests_view(request): cores += task["worker_info"]["concurrency"] chi2 = get_chi2(run["tasks"]) - update_residuals(run["tasks"], cached_chi2=chi2) try: show_task = int(request.params.get("show_task", -1))