Skip to content

Commit

Permalink
view: drain view builder before database
Browse files Browse the repository at this point in the history
The view builder is doing write operations to the database.
In order for the view builder to shutdown gracefully without errors, we
need to ensure the database can handle writes while it is drained.
The commit changes the drain order, so that view builder is drained
before the database shuts down.

Fixes scylladb#18929

Closes scylladb#19609
  • Loading branch information
mlitvk authored and xemul committed Jul 5, 2024
1 parent 103bd83 commit 407274e
Show file tree
Hide file tree
Showing 2 changed files with 36 additions and 1 deletion.
2 changes: 1 addition & 1 deletion service/storage_service.cc
Original file line number Diff line number Diff line change
Expand Up @@ -4505,10 +4505,10 @@ future<> storage_service::do_drain() {
return bm.drain();
});

co_await _view_builder.invoke_on_all(&db::view::view_builder::drain);
co_await _db.invoke_on_all(&replica::database::drain);
co_await _sys_ks.invoke_on_all(&db::system_keyspace::shutdown);
co_await _repair.invoke_on_all(&repair_service::shutdown);
co_await _view_builder.invoke_on_all(&db::view::view_builder::drain);
}

future<> storage_service::do_cluster_cleanup() {
Expand Down
35 changes: 35 additions & 0 deletions test/topology_custom/test_mv_fail_building.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,3 +44,38 @@ async def test_mv_fail_building(manager: ManagerClient) -> None:
assert sorted(rows) == sorted(base_rows)

await cql.run_async(f"DROP KEYSPACE ks")

# Reproduces #18929
# Test view build operations running during node shutdown and view drain.
# Verify the drain order is correct and the view build doesn't fail with
# database write failures.
@pytest.mark.asyncio
@skip_mode('release', "error injections aren't enabled in release mode")
async def test_mv_build_during_shutdown(manager: ManagerClient):
server = await manager.server_add()

cql = manager.get_cql()
await cql.run_async("CREATE KEYSPACE ks WITH replication = {'class': 'NetworkTopologyStrategy', 'replication_factor': 1}")
await cql.run_async("CREATE TABLE ks.t (pk int primary key, v int)")

for i in range(100):
await cql.run_async(f"insert into ks.t (pk, v) values ({i}, {i+1})")

# Start building two views. The first is delayed by the injection, and the second
# view build is queued, waiting on the view builder semaphore.
await manager.api.enable_injection(server.ip_addr, "delay_before_get_view_natural_endpoint", one_shot=True)
cql.run_async("CREATE materialized view ks.t_view1 AS select pk, v from ks.t where v is not null primary key (v, pk)")
cql.run_async("CREATE materialized view ks.t_view2 AS select pk, v from ks.t where v is not null primary key (v, pk)")

log = await manager.server_open_log(server.server_id)
mark = await log.mark()

# Start node shutdown. this will drain and abort the running view build.
# As we continue and drain the view building of view1 and view2 we will
# have writes to the database, running during the draining phase.
# If the drain order is correct it should succeed without errors.
await manager.server_stop_gracefully(server.server_id)

# Verify no db write errors during the shutdown
occurrences = await log.grep(expr="exception during mutation write", from_mark=mark)
assert len(occurrences) == 0

0 comments on commit 407274e

Please sign in to comment.