From fa402bbb189d862071352bda765358011b9d27d9 Mon Sep 17 00:00:00 2001 From: Michel Van den Bergh Date: Wed, 1 May 2024 01:45:48 +0000 Subject: [PATCH] Don't save the residuals and their colors in the db. The main reason for this PR is that currently the residuals are updated in tests_view. This is wrong. tests_view is a GET request. It is not allowed to change the server state. So if we want updated residuals then we need to update them during POST requests, such as update_task and failed_task. However this may be CPU intensive if a fleet of workers disconnects. We could throttle the updates but this requires some extra state that needs to be maintained (the time of the last update). So the alternative is to compute the residuals on the fly when they are displayed. This is what this PR does. This solves for example the issue #1948. It should now again be possible to serve the tasks from a secondary instance. Computing residuals on the fly is not possible for bad tasks. So for bad tasks record the residual and its color from the time they became bad. However the color is saved symbolically as green, yellow, red, instead of as an actual hex-code. --- server/fishtest/api.py | 2 +- server/fishtest/rundb.py | 23 ++++++------ server/fishtest/schemas.py | 6 ++-- server/fishtest/templates/tasks.mak | 11 +++--- server/fishtest/util.py | 54 ++++++++++++++++++----------- server/fishtest/views.py | 5 ++- 6 files changed, 58 insertions(+), 43 deletions(-) diff --git a/server/fishtest/api.py b/server/fishtest/api.py index e01a3ae474..57d1a088e5 100644 --- a/server/fishtest/api.py +++ b/server/fishtest/api.py @@ -331,7 +331,7 @@ def get_task(self): task["last_updated"] = str(task["last_updated"]) if "residual" in task: # json does not know about infinity - if task["residual"] == float("inf"): + if task.get("residual", None) == float("inf"): task["residual"] = "inf" return task diff --git a/server/fishtest/rundb.py b/server/fishtest/rundb.py index edaa59c7a3..d5524afe27 100644 --- a/server/fishtest/rundb.py +++ b/server/fishtest/rundb.py @@ -31,7 +31,7 @@ get_tc_ratio, post_in_fishcooking_results, remaining_hours, - update_residuals, + residual_to_color, worker_name, ) from fishtest.workerdb import WorkerDb @@ -203,6 +203,7 @@ def new_run( # Will be filled in by tasks, indexed by task-id. # Starts as an empty list. "tasks": [], + "bad_tasks": [], # Aggregated results "results": { "wins": 0, @@ -1425,8 +1426,6 @@ def purge_run(self, run, p=0.001, res=7.0, iters=1): return "You cannot purge a failed run" message = "No bad workers" # Transfer bad tasks to run["bad_tasks"] - if "bad_tasks" not in run: - run["bad_tasks"] = [] tasks = copy.copy(run["tasks"]) zero_stats = { @@ -1445,11 +1444,8 @@ def purge_run(self, run, p=0.001, res=7.0, iters=1): if crash_or_time(task): message = "" bad_task = copy.deepcopy(task) - # The next two lines are a bit hacky but - # the correct residual and color may not have - # been set yet. bad_task["residual"] = 10.0 - bad_task["residual_color"] = "#FF6A6A" + bad_task["residual_color"] = "red" bad_task["task_id"] = task_id bad_task["bad"] = True run["bad_tasks"].append(bad_task) @@ -1464,10 +1460,6 @@ def purge_run(self, run, p=0.001, res=7.0, iters=1): task["stats"] = copy.deepcopy(zero_stats) chi2 = get_chi2(run["tasks"]) - # Make sure the residuals are up to date. - # Once a task is moved to run["bad_tasks"] its - # residual will no longer change. - update_residuals(run["tasks"], cached_chi2=chi2) bad_workers = get_bad_workers( run["tasks"], cached_chi2=chi2, @@ -1484,6 +1476,15 @@ def purge_run(self, run, p=0.001, res=7.0, iters=1): bad_task = copy.deepcopy(task) bad_task["task_id"] = task_id bad_task["bad"] = True + # Remember the residual! + # Once a task is moved to run["bad_tasks"] its + # residual will no longer change. + bad_task["residual"] = chi2["residual"].get( + task["worker_info"]["unique_key"], float("inf") + ) + bad_task["residual_color"] = residual_to_color( + bad_task["residual"], chi2 + ) run["bad_tasks"].append(bad_task) task["bad"] = True task["active"] = False diff --git a/server/fishtest/schemas.py b/server/fishtest/schemas.py index 13bab4a191..46145cae03 100644 --- a/server/fishtest/schemas.py +++ b/server/fishtest/schemas.py @@ -622,8 +622,6 @@ def final_results_must_match(run): "active": bool, "last_updated": datetime_utc, "start": uint, - "residual?": number, - "residual_color?": str, "bad?": True, "stats": results_schema, "worker_info": worker_info_schema_runs, @@ -632,14 +630,14 @@ def final_results_must_match(run): ), ..., ], - "bad_tasks?": [ + "bad_tasks": [ { "num_games": intersect(uint, even), "active": False, "last_updated": datetime_utc, "start": uint, "residual": number, - "residual_color": str, + "residual_color": union("green", "yellow", "red"), "bad": True, "task_id": uint, "stats": results_schema, diff --git a/server/fishtest/templates/tasks.mak b/server/fishtest/templates/tasks.mak index 62b71d04b4..5302bff4f6 100644 --- a/server/fishtest/templates/tasks.mak +++ b/server/fishtest/templates/tasks.mak @@ -1,5 +1,5 @@ <%! - from fishtest.util import worker_name + from fishtest.util import worker_name, display_residual %> % for idx, task in enumerate(run['tasks'] + run.get('bad_tasks', [])): @@ -70,9 +70,12 @@ ${stats.get('time_losses', '-')} % if 'spsa' not in run['args']: - % if 'residual' in task and task['residual']!=float("inf"): - - ${f"{task['residual']:.3f}"} + <% + d=display_residual(task, chi2) + %> + % if d['residual']!=float("inf"): + + ${f"{d['residual']:.3f}"} % else: - diff --git a/server/fishtest/util.py b/server/fishtest/util.py index b7ea591af4..a280298dec 100644 --- a/server/fishtest/util.py +++ b/server/fishtest/util.py @@ -208,30 +208,44 @@ def get_bad_workers(tasks, cached_chi2=None, p=0.001, res=7.0, iters=1): return bad_workers -def update_residuals(tasks, cached_chi2=None): - # If we have an up-to-date result of get_chi2() we can pass - # it as cached_chi2 to avoid needless recomputation. - chi2 = get_chi2(tasks) if cached_chi2 is None else cached_chi2 - residuals = chi2["residual"] +def residual_to_color(residual, chi2): + if abs(residual) < chi2["z_95"]: + return "green" + elif abs(residual) < chi2["z_99"]: + return "yellow" + else: + return "red" - for task in tasks: - if "bad" in task: - continue - if "worker_info" not in task: - continue - task["residual"] = residuals.get( - task["worker_info"]["unique_key"], float("inf") - ) +def display_residual(task, chi2): + if "bad" in task and "residual" in task: + residual = task["residual"] + residual_color = task["residual_color"] + else: if crash_or_time(task): - task["residual"] = 10.0 - task["residual_color"] = "#FF6A6A" - elif abs(task["residual"]) < chi2["z_95"]: - task["residual_color"] = "#44EB44" - elif abs(task["residual"]) < chi2["z_99"]: - task["residual_color"] = "yellow" + residual = 10.0 + residual_color = "red" else: - task["residual_color"] = "#FF6A6A" + residual = chi2["residual"].get( + task["worker_info"]["unique_key"], float("inf") + ) + residual_color = residual_to_color(residual, chi2) + + display_colors = { + "green": "#44EB44", + "yellow": "yellow", + "red": "#FF6A6A", + "#44EB44": "#44EB44", # legacy bad tasks + "#FF6A6A": "#FF6A6A", + } + + display_color = display_colors[residual_color] + + return { + "residual": residual, + "residual_color": residual_color, + "display_color": display_color, + } def format_bounds(elo_model, elo0, elo1): diff --git a/server/fishtest/views.py b/server/fishtest/views.py index f0191d284c..2486001838 100644 --- a/server/fishtest/views.py +++ b/server/fishtest/views.py @@ -20,7 +20,6 @@ get_hash, get_tc_ratio, password_strength, - update_residuals, ) from pyramid.httpexceptions import HTTPFound, exception_response from pyramid.security import forget, remember @@ -1421,7 +1420,7 @@ def tests_tasks(request): run = request.rundb.get_run(request.matchdict["id"]) if run is None: raise exception_response(404) - + chi2 = get_chi2(run["tasks"]) try: show_task = int(request.params.get("show_task", -1)) except: @@ -1433,6 +1432,7 @@ def tests_tasks(request): "run": run, "approver": request.has_permission("approve_run"), "show_task": show_task, + "chi2": chi2, } @@ -1565,7 +1565,6 @@ def tests_view(request): task.setdefault("last_updated", datetime.min.replace(tzinfo=timezone.utc)) chi2 = get_chi2(run["tasks"]) - update_residuals(run["tasks"], cached_chi2=chi2) try: show_task = int(request.params.get("show_task", -1))