Skip to content

Commit 52b3843

Browse files
Merge pull request #12117 from rabbitmq/mergify/bp/v3.13.x/pr-12116
rabbitmq-upgrade await_quorum_plus_one improvements (backport #12113) (backport #12116)
2 parents dce5ecd + 52e80b3 commit 52b3843

File tree

4 files changed

+69
-39
lines changed

4 files changed

+69
-39
lines changed

deps/rabbit/src/rabbit_upgrade_preparation.erl

+22-2
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
%%
1818

1919
-define(SAMPLING_INTERVAL, 200).
20+
-define(LOGGING_FREQUENCY, ?SAMPLING_INTERVAL * 100).
2021

2122
await_online_quorum_plus_one(Timeout) ->
2223
Iterations = ceil(Timeout / ?SAMPLING_INTERVAL),
@@ -38,7 +39,11 @@ online_members(Component) ->
3839
erlang, whereis, [Component])).
3940

4041
endangered_critical_components() ->
41-
CriticalComponents = [rabbit_stream_coordinator],
42+
CriticalComponents = [rabbit_stream_coordinator] ++
43+
case rabbit_feature_flags:is_enabled(khepri_db) of
44+
true -> [rabbitmq_metadata];
45+
false -> []
46+
end,
4247
Nodes = rabbit_nodes:list_members(),
4348
lists:filter(fun (Component) ->
4449
NumAlive = length(online_members(Component)),
@@ -65,6 +70,21 @@ do_await_safe_online_quorum(IterationsLeft) ->
6570
case EndangeredQueues =:= [] andalso endangered_critical_components() =:= [] of
6671
true -> true;
6772
false ->
73+
case IterationsLeft rem ?LOGGING_FREQUENCY of
74+
0 ->
75+
case length(EndangeredQueues) of
76+
0 -> ok;
77+
N -> rabbit_log:info("Waiting for ~ts queues and streams to have quorum+1 replicas online."
78+
"You can list them with `rabbitmq-diagnostics check_if_node_is_quorum_critical`", [N])
79+
end,
80+
case endangered_critical_components() of
81+
[] -> ok;
82+
_ -> rabbit_log:info("Waiting for the following critical components to have quorum+1 replicas online: ~p.",
83+
[endangered_critical_components()])
84+
end;
85+
_ ->
86+
ok
87+
end,
6888
timer:sleep(?SAMPLING_INTERVAL),
6989
do_await_safe_online_quorum(IterationsLeft - 1)
7090
end.
@@ -89,6 +109,6 @@ list_with_minimum_quorum_for_cli() ->
89109
[#{
90110
<<"readable_name">> => C,
91111
<<"name">> => C,
92-
<<"virtual_host">> => "-",
112+
<<"virtual_host">> => <<"(not applicable)">>,
93113
<<"type">> => process
94114
} || C <- endangered_critical_components()].

deps/rabbit/test/upgrade_preparation_SUITE.erl

+36-29
Original file line numberDiff line numberDiff line change
@@ -15,20 +15,16 @@
1515

1616
all() ->
1717
[
18-
{group, quorum_queue},
19-
{group, stream}
18+
{group, clustered}
2019
].
2120

2221
groups() ->
2322
[
24-
{quorum_queue, [], [
25-
await_quorum_plus_one_qq
26-
]},
27-
{stream, [], [
28-
await_quorum_plus_one_stream
29-
]},
30-
{stream_coordinator, [], [
31-
await_quorum_plus_one_stream_coordinator
23+
{clustered, [], [
24+
await_quorum_plus_one_qq,
25+
await_quorum_plus_one_stream,
26+
await_quorum_plus_one_stream_coordinator,
27+
await_quorum_plus_one_rabbitmq_metadata
3228
]}
3329
].
3430

@@ -45,31 +41,30 @@ end_per_suite(Config) ->
4541
rabbit_ct_helpers:run_teardown_steps(Config).
4642

4743
init_per_group(Group, Config) ->
48-
case rabbit_ct_helpers:is_mixed_versions() of
49-
true ->
50-
%% in a 3.8/3.9 mixed cluster, ra will not cluster across versions,
51-
%% so quorum plus one will not be achieved
52-
{skip, "not mixed versions compatible"};
53-
_ ->
54-
Config1 = rabbit_ct_helpers:set_config(Config,
55-
[
56-
{rmq_nodes_count, 3},
57-
{rmq_nodename_suffix, Group}
58-
]),
59-
rabbit_ct_helpers:run_steps(Config1,
60-
rabbit_ct_broker_helpers:setup_steps() ++
61-
rabbit_ct_client_helpers:setup_steps())
62-
end.
44+
Config1 = rabbit_ct_helpers:set_config(Config,
45+
[
46+
{rmq_nodes_count, 3},
47+
{rmq_nodename_suffix, Group}
48+
]),
49+
rabbit_ct_helpers:run_steps(Config1,
50+
rabbit_ct_broker_helpers:setup_steps() ++
51+
rabbit_ct_client_helpers:setup_steps()).
6352

6453
end_per_group(_Group, Config) ->
6554
rabbit_ct_helpers:run_steps(Config,
6655
rabbit_ct_client_helpers:teardown_steps() ++
6756
rabbit_ct_broker_helpers:teardown_steps()).
6857

6958

70-
init_per_testcase(TestCase, Config) ->
71-
rabbit_ct_helpers:testcase_started(Config, TestCase),
72-
Config.
59+
init_per_testcase(Testcase, Config) when Testcase == await_quorum_plus_one_rabbitmq_metadata ->
60+
case rabbit_ct_helpers:is_mixed_versions() of
61+
true ->
62+
{skip, "not mixed versions compatible"};
63+
_ ->
64+
rabbit_ct_helpers:testcase_started(Config, Testcase)
65+
end;
66+
init_per_testcase(Testcase, Config) ->
67+
rabbit_ct_helpers:testcase_started(Config, Testcase).
7368

7469
end_per_testcase(TestCase, Config) ->
7570
rabbit_ct_helpers:testcase_finished(Config, TestCase).
@@ -121,12 +116,24 @@ await_quorum_plus_one_stream_coordinator(Config) ->
121116
%% no queues/streams beyond this point
122117

123118
ok = rabbit_ct_broker_helpers:stop_node(Config, B),
124-
%% this should fail because the corrdinator has only 2 running nodes
119+
%% this should fail because the coordinator has only 2 running nodes
125120
?assertNot(await_quorum_plus_one(Config, 0)),
126121

127122
ok = rabbit_ct_broker_helpers:start_node(Config, B),
128123
?assert(await_quorum_plus_one(Config, 0)).
129124

125+
await_quorum_plus_one_rabbitmq_metadata(Config) ->
126+
Nodes = [A, B, _C] = rabbit_ct_broker_helpers:get_node_configs(Config, nodename),
127+
ok = rabbit_ct_broker_helpers:enable_feature_flag(Config, Nodes, khepri_db),
128+
?assert(await_quorum_plus_one(Config, A)),
129+
130+
ok = rabbit_ct_broker_helpers:stop_node(Config, B),
131+
%% this should fail because rabbitmq_metadata has only 2 running nodes
132+
?assertNot(await_quorum_plus_one(Config, A)),
133+
134+
ok = rabbit_ct_broker_helpers:start_node(Config, B),
135+
?assert(await_quorum_plus_one(Config, A)).
136+
130137
%%
131138
%% Implementation
132139
%%

deps/rabbitmq_cli/lib/rabbitmq/cli/streams/commands/add_replica_command.ex

+6-6
Original file line numberDiff line numberDiff line change
@@ -25,10 +25,10 @@ defmodule RabbitMQ.CLI.Streams.Commands.AddReplicaCommand do
2525
to_atom(node)
2626
]) do
2727
{:error, :classic_queue_not_supported} ->
28-
{:error, "Cannot add replicas to a classic queue"}
28+
{:error, "Cannot add replicas to classic queues"}
2929

3030
{:error, :quorum_queue_not_supported} ->
31-
{:error, "Cannot add replicas to a quorum queue"}
31+
{:error, "Cannot add replicas to quorum queues"}
3232

3333
other ->
3434
other
@@ -37,11 +37,11 @@ defmodule RabbitMQ.CLI.Streams.Commands.AddReplicaCommand do
3737

3838
use RabbitMQ.CLI.DefaultOutput
3939

40-
def usage, do: "add_replica [--vhost <vhost>] <queue> <node>"
40+
def usage, do: "add_replica [--vhost <vhost>] <stream> <node>"
4141

4242
def usage_additional do
4343
[
44-
["<queue>", "stream queue name"],
44+
["<queue>", "stream name"],
4545
["<node>", "node to add a new replica on"]
4646
]
4747
end
@@ -54,11 +54,11 @@ defmodule RabbitMQ.CLI.Streams.Commands.AddReplicaCommand do
5454

5555
def help_section, do: :replication
5656

57-
def description, do: "Adds a stream queue replica on the given node."
57+
def description, do: "Adds a stream replica on the given node"
5858

5959
def banner([name, node], _) do
6060
[
61-
"Adding a replica for queue #{name} on node #{node}..."
61+
"Adding a replica for stream #{name} on node #{node}..."
6262
]
6363
end
6464
end

deps/rabbitmq_management/test/rabbit_mgmt_http_health_checks_SUITE.erl

+5-2
Original file line numberDiff line numberDiff line change
@@ -223,8 +223,11 @@ is_quorum_critical_test(Config) ->
223223
Body = http_get_failed(Config, "/health/checks/node-is-quorum-critical"),
224224
?assertEqual(<<"failed">>, maps:get(<<"status">>, Body)),
225225
?assertEqual(true, maps:is_key(<<"reason">>, Body)),
226-
[Queue] = maps:get(<<"queues">>, Body),
227-
?assertEqual(QName, maps:get(<<"name">>, Queue)),
226+
Queues = maps:get(<<"queues">>, Body),
227+
?assert(lists:any(
228+
fun(Item) ->
229+
QName =:= maps:get(<<"name">>, Item)
230+
end, Queues)),
228231

229232
passed.
230233

0 commit comments

Comments
 (0)