Skip to content

Commit

Permalink
Don't save the residuals and their colors in the db.
Browse files Browse the repository at this point in the history
The main reason for this PR is that currently the residuals
are updated in tests_view. This is wrong. tests_view is a
GET request. It is not allowed to change the server state.

So if we want updated residuals then we need to update
them during POST requests, such as update_task and failed_task.
However this may be CPU intensive if a fleet of workers
disconnects. We could throttle the updates but this requires
some extra state that needs to be maintained (the time of the last
update).

So the alternative is to compute the residuals on the fly when
they are displayed. This is what this PR does. This solves for
example the issue #1948. It should now again be possible to
serve the tasks from a secondary instance.

Computing residuals on the fly is not possible for bad tasks.
So for bad tasks we record the residual and its color from the time
they became bad. However the color is saved symbolically as
green, yellow, red, instead of as an actual hex-code.
  • Loading branch information
vdbergh authored and ppigazzini committed Jan 12, 2025
1 parent dc7aada commit d9a9750
Show file tree
Hide file tree
Showing 6 changed files with 65 additions and 42 deletions.
2 changes: 1 addition & 1 deletion server/fishtest/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -436,7 +436,7 @@ def get_task(self):
task["last_updated"] = str(task["last_updated"])
if "residual" in task:
# json does not know about infinity
if task["residual"] == float("inf"):
if task.get("residual", None) == float("inf"):
task["residual"] = "inf"
return task

Expand Down
27 changes: 17 additions & 10 deletions server/fishtest/rundb.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@
get_hash,
get_tc_ratio,
remaining_hours,
update_residuals,
residual_to_color,
worker_name,
)
from fishtest.workerdb import WorkerDb
Expand Down Expand Up @@ -554,7 +554,8 @@ def new_run(
"tasks": [],
"approved": False,
"approver": "",
# Aggregated data
"bad_tasks": [],
# Aggregated results
"results": {
"wins": 0,
"losses": 0,
Expand Down Expand Up @@ -1630,7 +1631,11 @@ def purge_run(self, run, p=0.001, res=7.0, iters=1):
if run.get("failed", False):
return "You cannot purge a failed run"
message = "No bad workers"
# Transfer bad tasks to run["bad_tasks"]

# The following is necessary to be able to purge
# old runs. It can be deleted after 30 days
# (cfr above).

if "bad_tasks" not in run:
run["bad_tasks"] = []

Expand All @@ -1642,14 +1647,10 @@ def purge_run(self, run, p=0.001, res=7.0, iters=1):
# Special cases: crashes or time losses.
if crash_or_time(task):
message = ""
# The residual or residual color my not have been set yet
self.set_bad_task(task_id, run, residual=10.0, residual_color="#FF6A6A")
# The residual or residual color may not have been set yet
self.set_bad_task(task_id, run, residual=10.0, residual_color="red")

chi2 = get_chi2(run["tasks"])
# Make sure the residuals are up to date.
# Once a task is moved to run["bad_tasks"] its
# residual will no longer change.
update_residuals(run["tasks"], cached_chi2=chi2)
bad_workers = get_bad_workers(
run["tasks"],
cached_chi2=chi2,
Expand All @@ -1663,7 +1664,13 @@ def purge_run(self, run, p=0.001, res=7.0, iters=1):
continue
if task["worker_info"]["unique_key"] in bad_workers:
message = ""
self.set_bad_task(task_id, run)
residual = chi2["residual"].get(
task["worker_info"]["unique_key"], float("inf")
)
residual_color = residual_to_color(residual, chi2)
self.set_bad_task(
task_id, run, residual=residual, residual_color=residual_color
)

if message == "":
results = compute_results(run)
Expand Down
9 changes: 4 additions & 5 deletions server/fishtest/schemas.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@
even = div(2, name="even")
datetime_utc = intersect(datetime, fields({"tzinfo": timezone.utc}))
gzip_data = magic("application/gzip", name="gzip_data")
residual_color = set_name(union("green", "yellow", "red"), "residual_color")

uint = intersect(int, ge(0))
suint = intersect(int, gt(0))
Expand Down Expand Up @@ -640,7 +641,7 @@ def flags_must_match(run):
# about non-validation of runs created with the prior
# schema.

RUN_VERSION = 6
RUN_VERSION = 7

runs_schema = intersect(
{
Expand Down Expand Up @@ -762,8 +763,6 @@ def flags_must_match(run):
"active": bool,
"last_updated": datetime_utc,
"start": uint,
"residual?": number,
"residual_color?": str,
"bad?": True,
"stats": results_schema,
"worker_info": worker_info_schema_runs,
Expand All @@ -772,14 +771,14 @@ def flags_must_match(run):
),
...,
],
"bad_tasks?": [
"bad_tasks": [
{
"num_games": intersect(uint, even),
"active": False,
"last_updated": datetime_utc,
"start": uint,
"residual": number,
"residual_color": str,
"residual_color": residual_color,
"bad": True,
"task_id": task_id,
"stats": results_schema,
Expand Down
11 changes: 7 additions & 4 deletions server/fishtest/templates/tasks.mak
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
<%!
from fishtest.util import worker_name
from fishtest.util import worker_name, display_residual
%>

% for idx, task in enumerate(run['tasks'] + run.get('bad_tasks', [])):
Expand Down Expand Up @@ -70,9 +70,12 @@
<td>${stats.get('time_losses', '-')}</td>

% if 'spsa' not in run['args']:
% if 'residual' in task and task['residual']!=float("inf"):
<td style="background-color:${task['residual_color']}">
${f"{task['residual']:.3f}"}
<%
d=display_residual(task, chi2)
%>
% if d['residual']!=float("inf"):
<td style="background-color:${d['display_color']}">
${f"{d['residual']:.3f}"}
</td>
% else:
<td>-</td>
Expand Down
54 changes: 34 additions & 20 deletions server/fishtest/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -209,30 +209,44 @@ def get_bad_workers(tasks, cached_chi2=None, p=0.001, res=7.0, iters=1):
return bad_workers


def update_residuals(tasks, cached_chi2=None):
# If we have an up-to-date result of get_chi2() we can pass
# it as cached_chi2 to avoid needless recomputation.
chi2 = get_chi2(tasks) if cached_chi2 is None else cached_chi2
residuals = chi2["residual"]
def residual_to_color(residual, chi2):
if abs(residual) < chi2["z_95"]:
return "green"
elif abs(residual) < chi2["z_99"]:
return "yellow"
else:
return "red"

for task in tasks:
if "bad" in task:
continue
if "worker_info" not in task:
continue
task["residual"] = residuals.get(
task["worker_info"]["unique_key"], float("inf")
)

def display_residual(task, chi2):
if "bad" in task and "residual" in task:
residual = task["residual"]
residual_color = task["residual_color"]
else:
if crash_or_time(task):
task["residual"] = 10.0
task["residual_color"] = "#FF6A6A"
elif abs(task["residual"]) < chi2["z_95"]:
task["residual_color"] = "#44EB44"
elif abs(task["residual"]) < chi2["z_99"]:
task["residual_color"] = "yellow"
residual = 10.0
residual_color = "red"
else:
task["residual_color"] = "#FF6A6A"
residual = chi2["residual"].get(
task["worker_info"]["unique_key"], float("inf")
)
residual_color = residual_to_color(residual, chi2)

display_colors = {
"green": "#44EB44",
"yellow": "yellow",
"red": "#FF6A6A",
"#44EB44": "#44EB44", # legacy bad tasks
"#FF6A6A": "#FF6A6A",
}

display_color = display_colors[residual_color]

return {
"residual": residual,
"residual_color": residual_color,
"display_color": display_color,
}


def format_bounds(elo_model, elo0, elo1):
Expand Down
4 changes: 2 additions & 2 deletions server/fishtest/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@
github_repo_valid,
is_sprt_ltc_data,
password_strength,
update_residuals,
)
from pyramid.httpexceptions import HTTPFound, HTTPNotFound
from pyramid.security import forget, remember
Expand Down Expand Up @@ -1461,6 +1460,7 @@ def tests_tasks(request):
run = request.rundb.get_run(request.matchdict["id"])
if run is None:
raise HTTPNotFound()
chi2 = get_chi2(run["tasks"])

try:
show_task = int(request.params.get("show_task", -1))
Expand All @@ -1473,6 +1473,7 @@ def tests_tasks(request):
"run": run,
"approver": request.has_permission("approve_run"),
"show_task": show_task,
"chi2": chi2,
}


Expand Down Expand Up @@ -1603,7 +1604,6 @@ def tests_view(request):
cores += task["worker_info"]["concurrency"]

chi2 = get_chi2(run["tasks"])
update_residuals(run["tasks"], cached_chi2=chi2)

try:
show_task = int(request.params.get("show_task", -1))
Expand Down

0 comments on commit d9a9750

Please sign in to comment.