Skip to content

Commit 656f49d

Browse files
authored
Ensure regression tasks don't start new tasks when not making progress (#4705)
This returns a bad-build error type after a regression task, when a known min/max input pair didn't shrink. Previously, a timeout error type was returned, after which post-process respawns the task with the same min/max pair, leading to a task loop. This doesn't change behavior when either min or max are not known (e.g. when tasks are initially started) or when some progress is made (e.g. at least one improvement to min or max during several hours this task can run) or when other error conditions happen. Chrome bug: https://crbug.com/396344382
1 parent 0f590af commit 656f49d

File tree

2 files changed

+66
-6
lines changed

2 files changed

+66
-6
lines changed

src/clusterfuzz/_internal/bot/tasks/utasks/regression_task.py

Lines changed: 26 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -414,8 +414,14 @@ def find_regression_range(
414414
error_type=uworker_msg_pb2.ErrorType.REGRESSION_REVISION_LIST_ERROR) # pylint: disable=no-member
415415

416416
# Pick up where left off in a previous run if necessary.
417-
min_revision = testcase.get_metadata('last_regression_min')
418-
max_revision = testcase.get_metadata('last_regression_max')
417+
# Cache this data here to judge in the end if we actually made progress.
418+
# Between here and the end of the loop also a lot of time might pass, in
419+
# which another simultaneously running regression task might mess with
420+
# the metadata.
421+
last_min_revision = testcase.get_metadata('last_regression_min')
422+
last_max_revision = testcase.get_metadata('last_regression_max')
423+
min_revision = last_min_revision
424+
max_revision = last_max_revision
419425

420426
logs.info('Build set up, starting search for regression range. State: ' +
421427
f'crash_revision = {testcase.crash_revision}, ' +
@@ -548,13 +554,27 @@ def find_regression_range(
548554
regression_task_output.last_regression_min = revision_list[min_index]
549555
regression_task_output.last_regression_max = revision_list[max_index]
550556

551-
# If we've broken out of the above loop, we timed out. We'll finish by
552-
# running another regression task and picking up from this point.
557+
# If we've broken out of the above loop, we timed out. Remember where
558+
# we left.
559+
regression_task_output.last_regression_min = revision_list[min_index]
560+
regression_task_output.last_regression_max = revision_list[max_index]
561+
562+
# Check if we made progress at all. If this task already resumed a previous
563+
# timeout, it started with known min/max revisions. Without any progress,
564+
# likely most builds failed the bad build check, in which case we don't
565+
# want to restart another task to avoid a task loop.
566+
if (last_min_revision == revision_list[min_index] and
567+
last_max_revision == revision_list[max_index]):
568+
return uworker_msg_pb2.Output( # pylint: disable=no-member
569+
regression_task_output=regression_task_output,
570+
error_type=uworker_msg_pb2.REGRESSION_BAD_BUILD_ERROR, # pylint: disable=no-member
571+
error_message='No progress during bisect.')
572+
573+
# Because we made progress, the timeout error handler will trigger another
574+
# regression task and pick up from this point.
553575
# TODO: Error handling should be moved to postprocess.
554576
error_message = 'Timed out, current range r%d:r%d' % (
555577
revision_list[min_index], revision_list[max_index])
556-
regression_task_output.last_regression_min = revision_list[min_index]
557-
regression_task_output.last_regression_max = revision_list[max_index]
558578
return uworker_msg_pb2.Output( # pylint: disable=no-member
559579
regression_task_output=regression_task_output,
560580
error_type=uworker_msg_pb2.REGRESSION_TIMEOUT_ERROR, # pylint: disable=no-member

src/clusterfuzz/_internal/tests/core/bot/tasks/utasks/regression_task_test.py

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1014,6 +1014,46 @@ def reproduces(revision):
10141014
self.assertEqual(output.regression_task_output.regression_range_start, 50)
10151015
self.assertEqual(output.regression_task_output.regression_range_end, 100)
10161016

1017+
def test_timeout_bisect_no_progress(self):
1018+
"""Verifies that bisection without progress will terminate."""
1019+
self.mock.get_revisions_list.return_value = list(range(0, 102, 2))
1020+
self.deadline = 5.
1021+
1022+
def repros(revision):
1023+
self.mock_time += 1.
1024+
1025+
if revision < 68:
1026+
# Let every revision except the max revision have bad build errors.
1027+
return False, uworker_msg_pb2.Output(
1028+
error_type=uworker_msg_pb2.REGRESSION_BAD_BUILD_ERROR)
1029+
return True, None
1030+
1031+
self.reproduces_in_revision = repros
1032+
1033+
testcase = test_utils.create_generic_testcase()
1034+
testcase.crash_revision = 100
1035+
1036+
# Pick up an unfinished task.
1037+
testcase.set_metadata('last_regression_max', 68)
1038+
testcase.set_metadata('last_regression_min', 36)
1039+
1040+
uworker_input = uworker_msg_pb2.Input(
1041+
testcase_id=str(testcase.key.id()),
1042+
testcase=uworker_io.entity_to_protobuf(testcase),
1043+
job_type='foo-job',
1044+
setup_input=uworker_msg_pb2.SetupInput(),
1045+
regression_task_input=uworker_msg_pb2.RegressionTaskInput(),
1046+
)
1047+
1048+
output = regression_task.utask_main(uworker_input)
1049+
1050+
self.assertEqual(output.error_type,
1051+
uworker_msg_pb2.REGRESSION_BAD_BUILD_ERROR)
1052+
1053+
# The task made no progress.
1054+
self.assertEqual(output.regression_task_output.last_regression_max, 68)
1055+
self.assertEqual(output.regression_task_output.last_regression_min, 36)
1056+
10171057
def test_inconsistent_state_max_none_min_not_none(self):
10181058
"""Verifies that when last_regression_max is None and last_regression_min
10191059
is not None, we ignore the latter and restart from scratch."""

0 commit comments

Comments
 (0)