Skip to content

Commit

Permalink
Don't save the residuals and their colors in the db.
Browse files Browse the repository at this point in the history
The main reason for this PR is that currently the residuals
are updated in tests_view. This is wrong. tests_view is a
GET request. It is not allowed to change the server state.

So if we want updated residuals then we need to update
them during POST requests, such as update_task and failed_task.
However this may be CPU intensive if a fleet of workers
disconnects. We could throttle the updates but this requires
some extra state that needs to be maintained (the time of the last
update).

So the alternative is to compute the residuals on the fly when
they are displayed. This is what this PR does. This solves for
example the issue #1948. It should now again be possible to
serve the tasks from a secondary instance.

Computing residuals on the fly is not possible for bad tasks.
So for bad tasks we record the residual and its color from the time
they became bad. However the color is saved symbolically as
green, yellow, red, instead of as an actual hex-code.
  • Loading branch information
vdbergh committed May 1, 2024
1 parent 7922914 commit 74cfbbe
Show file tree
Hide file tree
Showing 6 changed files with 58 additions and 43 deletions.
2 changes: 1 addition & 1 deletion server/fishtest/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -331,7 +331,7 @@ def get_task(self):
task["last_updated"] = str(task["last_updated"])
if "residual" in task:
# json does not know about infinity
if task["residual"] == float("inf"):
if task.get("residual", None) == float("inf"):
task["residual"] = "inf"
return task

Expand Down
23 changes: 12 additions & 11 deletions server/fishtest/rundb.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
get_tc_ratio,
post_in_fishcooking_results,
remaining_hours,
update_residuals,
residual_to_color,
worker_name,
)
from fishtest.workerdb import WorkerDb
Expand Down Expand Up @@ -203,6 +203,7 @@ def new_run(
# Will be filled in by tasks, indexed by task-id.
# Starts as an empty list.
"tasks": [],
"bad_tasks": [],
# Aggregated results
"results": {
"wins": 0,
Expand Down Expand Up @@ -1425,8 +1426,6 @@ def purge_run(self, run, p=0.001, res=7.0, iters=1):
return "You cannot purge a failed run"
message = "No bad workers"
# Transfer bad tasks to run["bad_tasks"]
if "bad_tasks" not in run:
run["bad_tasks"] = []

tasks = copy.copy(run["tasks"])
zero_stats = {
Expand All @@ -1445,11 +1444,8 @@ def purge_run(self, run, p=0.001, res=7.0, iters=1):
if crash_or_time(task):
message = ""
bad_task = copy.deepcopy(task)
# The next two lines are a bit hacky but
# the correct residual and color may not have
# been set yet.
bad_task["residual"] = 10.0
bad_task["residual_color"] = "#FF6A6A"
bad_task["residual_color"] = "red"
bad_task["task_id"] = task_id
bad_task["bad"] = True
run["bad_tasks"].append(bad_task)
Expand All @@ -1464,10 +1460,6 @@ def purge_run(self, run, p=0.001, res=7.0, iters=1):
task["stats"] = copy.deepcopy(zero_stats)

chi2 = get_chi2(run["tasks"])
# Make sure the residuals are up to date.
# Once a task is moved to run["bad_tasks"] its
# residual will no longer change.
update_residuals(run["tasks"], cached_chi2=chi2)
bad_workers = get_bad_workers(
run["tasks"],
cached_chi2=chi2,
Expand All @@ -1484,6 +1476,15 @@ def purge_run(self, run, p=0.001, res=7.0, iters=1):
bad_task = copy.deepcopy(task)
bad_task["task_id"] = task_id
bad_task["bad"] = True
# Remember the residual!
# Once a task is moved to run["bad_tasks"] its
# residual will no longer change.
bad_task["residual"] = chi2["residual"].get(
task["worker_info"]["unique_key"], float("inf")
)
bad_task["residual_color"] = residual_to_color(
bad_task["residual"], chi2
)
run["bad_tasks"].append(bad_task)
task["bad"] = True
task["active"] = False
Expand Down
6 changes: 2 additions & 4 deletions server/fishtest/schemas.py
Original file line number Diff line number Diff line change
Expand Up @@ -622,8 +622,6 @@ def final_results_must_match(run):
"active": bool,
"last_updated": datetime_utc,
"start": uint,
"residual?": number,
"residual_color?": str,
"bad?": True,
"stats": results_schema,
"worker_info": worker_info_schema_runs,
Expand All @@ -632,14 +630,14 @@ def final_results_must_match(run):
),
...,
],
"bad_tasks?": [
"bad_tasks": [
{
"num_games": intersect(uint, even),
"active": False,
"last_updated": datetime_utc,
"start": uint,
"residual": number,
"residual_color": str,
"residual_color": union("green", "yellow", "red"),
"bad": True,
"task_id": uint,
"stats": results_schema,
Expand Down
11 changes: 7 additions & 4 deletions server/fishtest/templates/tasks.mak
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
<%!
from fishtest.util import worker_name
from fishtest.util import worker_name, display_residual
%>

% for idx, task in enumerate(run['tasks'] + run.get('bad_tasks', [])):
Expand Down Expand Up @@ -70,9 +70,12 @@
<td>${stats.get('time_losses', '-')}</td>

% if 'spsa' not in run['args']:
% if 'residual' in task and task['residual']!=float("inf"):
<td style="background-color:${task['residual_color']}">
${f"{task['residual']:.3f}"}
<%
d=display_residual(task, chi2)
%>
% if d['residual']!=float("inf"):
<td style="background-color:${d['display_color']}">
${f"{d['residual']:.3f}"}
</td>
% else:
<td>-</td>
Expand Down
54 changes: 34 additions & 20 deletions server/fishtest/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -208,30 +208,44 @@ def get_bad_workers(tasks, cached_chi2=None, p=0.001, res=7.0, iters=1):
return bad_workers


def update_residuals(tasks, cached_chi2=None):
# If we have an up-to-date result of get_chi2() we can pass
# it as cached_chi2 to avoid needless recomputation.
chi2 = get_chi2(tasks) if cached_chi2 is None else cached_chi2
residuals = chi2["residual"]
def residual_to_color(residual, chi2):
if abs(residual) < chi2["z_95"]:
return "green"
elif abs(residual) < chi2["z_99"]:
return "yellow"
else:
return "red"

for task in tasks:
if "bad" in task:
continue
if "worker_info" not in task:
continue
task["residual"] = residuals.get(
task["worker_info"]["unique_key"], float("inf")
)

def display_residual(task, chi2):
if "bad" in task and "residual" in task:
residual = task["residual"]
residual_color = task["residual_color"]
else:
if crash_or_time(task):
task["residual"] = 10.0
task["residual_color"] = "#FF6A6A"
elif abs(task["residual"]) < chi2["z_95"]:
task["residual_color"] = "#44EB44"
elif abs(task["residual"]) < chi2["z_99"]:
task["residual_color"] = "yellow"
residual = 10.0
residual_color = "red"
else:
task["residual_color"] = "#FF6A6A"
residual = chi2["residual"].get(
task["worker_info"]["unique_key"], float("inf")
)
residual_color = residual_to_color(residual, chi2)

display_colors = {
"green": "#44EB44",
"yellow": "yellow",
"red": "#FF6A6A",
"#44EB44": "#44EB44", # legacy bad tasks
"#FF6A6A": "#FF6A6A",
}

display_color = display_colors[residual_color]

return {
"residual": residual,
"residual_color": residual_color,
"display_color": display_color,
}


def format_bounds(elo_model, elo0, elo1):
Expand Down
5 changes: 2 additions & 3 deletions server/fishtest/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@
get_hash,
get_tc_ratio,
password_strength,
update_residuals,
)
from pyramid.httpexceptions import HTTPFound, exception_response
from pyramid.security import forget, remember
Expand Down Expand Up @@ -1421,7 +1420,7 @@ def tests_tasks(request):
run = request.rundb.get_run(request.matchdict["id"])
if run is None:
raise exception_response(404)

chi2 = get_chi2(run["tasks"])
try:
show_task = int(request.params.get("show_task", -1))
except:
Expand All @@ -1433,6 +1432,7 @@ def tests_tasks(request):
"run": run,
"approver": request.has_permission("approve_run"),
"show_task": show_task,
"chi2": chi2,
}


Expand Down Expand Up @@ -1565,7 +1565,6 @@ def tests_view(request):
task.setdefault("last_updated", datetime.min.replace(tzinfo=timezone.utc))

chi2 = get_chi2(run["tasks"])
update_residuals(run["tasks"], cached_chi2=chi2)

try:
show_task = int(request.params.get("show_task", -1))
Expand Down

0 comments on commit 74cfbbe

Please sign in to comment.