Skip to content

Commit

Permalink
roachtest: reset failures on vm preemption
Browse files Browse the repository at this point in the history
This change resets failures in the case of a vm preemption,
in case a timeout occurred which normally takes precedence
over all other failures. While a timeout suggests that
something should be fixed with the test (usually respecting
the test context cancellation), we see that in practice,
engineers tend to close the issue without investigating as
soon as they see the preemption.

This also removes the potential duplicate vm_preemption
failure that may have been added by the preemption polling.
  • Loading branch information
DarrylWong committed Jan 30, 2025
1 parent 2ff9ee5 commit 9885bd9
Show file tree
Hide file tree
Showing 2 changed files with 28 additions and 0 deletions.
3 changes: 3 additions & 0 deletions pkg/cmd/roachtest/test_runner.go
Original file line number Diff line number Diff line change
Expand Up @@ -1164,6 +1164,9 @@ func (r *testRunner) runTest(
// Note that this error message is referred for test selection in
// pkg/cmd/roachtest/testselector/snowflake_query.sql.
failureMsg = fmt.Sprintf("VMs preempted during the test run: %s\n\n**Other Failures:**\n%s", preemptedVMNames, failureMsg)
// Reset the failures as a timeout may have suppressed failures, but we
// want to propagate the preemption error and avoid creating an issue.
t.resetFailures()
t.Error(vmPreemptionError(preemptedVMNames))
}
hostErrorVMNames := getHostErrorVMNames(ctx, c, l)
Expand Down
25 changes: 25 additions & 0 deletions pkg/cmd/roachtest/test_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -800,4 +800,29 @@ func TestVMPreemptionPolling(t *testing.T) {

require.NoError(t, err)
})

// Test that if the test hangs until timeout, a VM preemption will still be caught.
t.Run("test hangs and still catches preemption", func(t *testing.T) {
// We don't want the polling to cancel the test early.
setPollPreemptionInterval(10 * time.Minute)
getPreemptedVMsHook = func(c cluster.Cluster, ctx context.Context, l *logger.Logger) ([]vm.PreemptedVM, error) {
preemptedVMs := []vm.PreemptedVM{{
Name: "test_node",
PreemptedAt: time.Now(),
}}
return preemptedVMs, nil
}

mockTest.Run = func(ctx context.Context, t test.Test, c cluster.Cluster) {
for {
<-ctx.Done()
}
}
mockTest.Timeout = 10 * time.Millisecond

err := runner.Run(ctx, []registry.TestSpec{mockTest}, 1, /* count */
defaultParallelism, copt, testOpts{}, lopt)

require.NoError(t, err)
})
}

0 comments on commit 9885bd9

Please sign in to comment.