Skip to content

Commit

Permalink
storage_controller: rename failpoint and make it pausable
Browse files Browse the repository at this point in the history
The same failpoint is used for a new test by a follow up commit
and that needs a pausable failpoint.
  • Loading branch information
VladLazar committed Jan 7, 2025
1 parent be38123 commit d3fa0f6
Show file tree
Hide file tree
Showing 2 changed files with 11 additions and 3 deletions.
3 changes: 1 addition & 2 deletions storage_controller/src/reconciler.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@ use std::sync::Arc;
use std::time::{Duration, Instant};
use tokio_util::sync::CancellationToken;
use utils::backoff::exponential_backoff;
use utils::failpoint_support;
use utils::generation::Generation;
use utils::id::{NodeId, TimelineId};
use utils::lsn::Lsn;
Expand Down Expand Up @@ -824,7 +823,7 @@ impl Reconciler {
.handle_detach(self.tenant_shard_id, self.shard.stripe_size);
}

failpoint_support::sleep_millis_async!("sleep-on-reconcile-epilogue");
pausable_failpoint!("reconciler-epilogue");

Ok(())
}
Expand Down
11 changes: 10 additions & 1 deletion test_runner/regress/test_storage_controller.py
Original file line number Diff line number Diff line change
Expand Up @@ -2406,7 +2406,14 @@ def test_storage_controller_step_down(neon_env_builder: NeonEnvBuilder):
env.storage_controller.tenant_create(tid)

env.storage_controller.reconcile_until_idle()
env.storage_controller.configure_failpoints(("sleep-on-reconcile-epilogue", "return(10000)"))
env.storage_controller.configure_failpoints(("reconciler-epilogue", "pause"))

def unpause_failpoint():
time.sleep(2)
env.storage_controller.configure_failpoints(("reconciler-epilogue", "off"))

thread = threading.Thread(target=unpause_failpoint)
thread.start()

# Make a change to the tenant config to trigger a slow reconcile
virtual_ps_http = PageserverHttpClient(env.storage_controller_port, lambda: True)
Expand All @@ -2421,6 +2428,8 @@ def test_storage_controller_step_down(neon_env_builder: NeonEnvBuilder):
observed_state = env.storage_controller.step_down()
log.info(f"Storage controller stepped down with {observed_state=}")

thread.join()

# Validate that we waited for the slow reconcile to complete
# and updated the observed state in the storcon before stepping down.
node_id = str(env.pageserver.id)
Expand Down

0 comments on commit d3fa0f6

Please sign in to comment.